diff --git a/app/src/main/cpp/skyline/gpu.cpp b/app/src/main/cpp/skyline/gpu.cpp index 17dee432..d60ac1e2 100644 --- a/app/src/main/cpp/skyline/gpu.cpp +++ b/app/src/main/cpp/skyline/gpu.cpp @@ -120,7 +120,7 @@ namespace skyline::gpu { } vk::raii::Device GPU::CreateDevice(const vk::raii::PhysicalDevice &physicalDevice, decltype(vk::DeviceQueueCreateInfo::queueCount) &vkQueueFamilyIndex, QuirkManager &quirks) { - auto deviceFeatures2{physicalDevice.getFeatures2()}; + auto deviceFeatures2{physicalDevice.getFeatures2()}; decltype(deviceFeatures2) enabledFeatures2{}; // We only want to enable features we required due to potential overhead from unused features #define FEAT_REQ(structName, feature) \ diff --git a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h index 4839ad96..b29306de 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/graphics_context.h @@ -11,6 +11,7 @@ #include #include "command_executor.h" +#include "types/tsc.h" namespace skyline::gpu::interconnect { namespace maxwell3d = soc::gm20b::engine::maxwell3d::type; @@ -1547,6 +1548,211 @@ namespace skyline::gpu::interconnect { private: u32 bindlessTextureConstantBufferIndex{}; + /* Samplers */ + private: + struct Sampler : public vk::raii::Sampler, public FenceCycleDependency { + using vk::raii::Sampler::Sampler; + }; + + struct SamplerPool { + IOVA iova; + u32 maximumIndex; + span samplerControls; + std::unordered_map, util::ObjectHash> samplers; + } samplerPool{}; + + public: + void SetSamplerPoolIovaHigh(u32 high) { + samplerPool.iova.high = high; + samplerPool.samplerControls = nullptr; + } + + void SetSamplerPoolIovaLow(u32 low) { + samplerPool.iova.low = low; + samplerPool.samplerControls = nullptr; + } + + void SetSamplerPoolMaximumIndex(u32 index) { + samplerPool.maximumIndex = index; + samplerPool.samplerControls = nullptr; + } + + vk::Filter ConvertSamplerFilter(TextureSamplerControl::Filter filter) { + using TscFilter = TextureSamplerControl::Filter; + using VkFilter = vk::Filter; + switch (filter) { + // @fmt:off + + case TscFilter::Nearest: return VkFilter::eNearest; + case TscFilter::Linear: return VkFilter::eLinear; + + // @fmt:on + } + } + + vk::SamplerMipmapMode ConvertSamplerMipFilter(TextureSamplerControl::MipFilter filter) { + using TscFilter = TextureSamplerControl::MipFilter; + using VkMode = vk::SamplerMipmapMode; + switch (filter) { + // @fmt:off + + case TscFilter::None: return VkMode{}; + case TscFilter::Nearest: return VkMode::eNearest; + case TscFilter::Linear: return VkMode::eLinear; + + // @fmt:on + } + } + + vk::SamplerAddressMode ConvertSamplerAddressMode(TextureSamplerControl::AddressMode mode) { + using TscMode = TextureSamplerControl::AddressMode; + using VkMode = vk::SamplerAddressMode; + switch (mode) { + // @fmt:off + + case TscMode::Repeat: return VkMode::eRepeat; + case TscMode::MirroredRepeat: return VkMode::eMirroredRepeat; + + case TscMode::ClampToEdge: return VkMode::eClampToEdge; + case TscMode::ClampToBorder: return VkMode::eClampToBorder; + case TscMode::Clamp: return VkMode::eClampToEdge; // Vulkan doesn't support 'GL_CLAMP' so this is an approximation + + case TscMode::MirrorClampToEdge: return VkMode::eMirrorClampToEdge; + case TscMode::MirrorClampToBorder: return VkMode::eMirrorClampToEdge; // Only supported mirror clamps are to edges so this is an approximation + case TscMode::MirrorClamp: return VkMode::eMirrorClampToEdge; // Same as above + + // @fmt:on + } + } + + vk::CompareOp ConvertSamplerCompareOp(TextureSamplerControl::CompareOp compareOp) { + using TscOp = TextureSamplerControl::CompareOp; + using VkOp = vk::CompareOp; + switch (compareOp) { + // @fmt:off + + case TscOp::Never: return VkOp::eNever; + case TscOp::Less: return VkOp::eLess; + case TscOp::Equal: return VkOp::eEqual; + case TscOp::LessOrEqual: return VkOp::eLessOrEqual; + case TscOp::Greater: return VkOp::eGreater; + case TscOp::NotEqual: return VkOp::eNotEqual; + case TscOp::GreaterOrEqual: return VkOp::eGreaterOrEqual; + case TscOp::Always: return VkOp::eAlways; + + // @fmt:on + } + } + + vk::SamplerReductionMode ConvertSamplerReductionFilter(TextureSamplerControl::SamplerReduction reduction) { + using TscReduction = TextureSamplerControl::SamplerReduction; + using VkReduction = vk::SamplerReductionMode; + switch (reduction) { + // @fmt:off + + case TscReduction::WeightedAverage: return VkReduction::eWeightedAverage; + case TscReduction::Min: return VkReduction::eMin; + case TscReduction::Max: return VkReduction::eMax; + + // @fmt:on + } + } + + vk::BorderColor ConvertBorderColorWithCustom(float red, float green, float blue, float alpha) { + if (alpha == 1.0f) { + if (red == 1.0f && green == 1.0f && blue == 1.0f) + return vk::BorderColor::eFloatOpaqueWhite; + else if (red == 0.0f && green == 0.0f && blue == 0.0f) + return vk::BorderColor::eFloatOpaqueBlack; + } else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) { + return vk::BorderColor::eFloatTransparentBlack; + } + + return vk::BorderColor::eFloatCustomEXT; + } + + vk::BorderColor ConvertBorderColorFixed(float red, float green, float blue, float alpha) { + if (alpha == 1.0f) { + if (red == 1.0f && green == 1.0f && blue == 1.0f) + return vk::BorderColor::eFloatOpaqueWhite; + else if (red == 0.0f && green == 0.0f && blue == 0.0f) + return vk::BorderColor::eFloatOpaqueBlack; + } else if (red == 1.0f && green == 1.0f && blue == 1.0f && alpha == 0.0f) { + return vk::BorderColor::eFloatTransparentBlack; + } + + // Approximations of a custom color using fixed colors + if (red + green + blue > 1.0f) + return vk::BorderColor::eFloatOpaqueWhite; + else if (alpha > 0.0f) + return vk::BorderColor::eFloatOpaqueBlack; + else + return vk::BorderColor::eFloatTransparentBlack; + } + + std::shared_ptr GetSampler(u32 index) { + if (!samplerPool.samplerControls.valid()) { + auto mappings{channelCtx.asCtx->gmmu.TranslateRange(samplerPool.iova, samplerPool.maximumIndex * sizeof(TextureSamplerControl))}; + if (mappings.size() != 1) + throw exception("Sampler pool mapping count is unexpected: {}", mappings.size()); + samplerPool.samplerControls = mappings.front().cast(); + } + + TextureSamplerControl &samplerControl{samplerPool.samplerControls[index]}; + auto &sampler{samplerPool.samplers[samplerControl]}; + if (sampler) + return sampler; + + auto convertAddressModeWithCheck{[&](TextureSamplerControl::AddressMode mode) { + auto vkMode{ConvertSamplerAddressMode(mode)}; + if (vkMode == vk::SamplerAddressMode::eMirrorClampToEdge && !gpu.quirks.supportsSamplerMirrorClampToEdge) [[unlikely]] { + Logger::Warn("Cannot use Mirror Clamp To Edge as Sampler Address Mode without host GPU support"); + return vk::SamplerAddressMode::eClampToEdge; // We use a normal clamp to edge to approximate it + } + return vkMode; + }}; + + auto maxAnisotropy{samplerControl.MaxAnisotropy()}; + vk::StructureChain samplerInfo{ + vk::SamplerCreateInfo{ + .magFilter = ConvertSamplerFilter(samplerControl.magFilter), + .minFilter = ConvertSamplerFilter(samplerControl.minFilter), + .mipmapMode = ConvertSamplerMipFilter(samplerControl.mipFilter), + .addressModeU = convertAddressModeWithCheck(samplerControl.addressModeU), + .addressModeV = convertAddressModeWithCheck(samplerControl.addressModeV), + .addressModeW = convertAddressModeWithCheck(samplerControl.addressModeP), + .mipLodBias = samplerControl.MipLodBias(), + .anisotropyEnable = maxAnisotropy > 1.0f, + .maxAnisotropy = maxAnisotropy, + .compareEnable = samplerControl.depthCompareEnable, + .compareOp = ConvertSamplerCompareOp(samplerControl.depthCompareOp), + .minLod = samplerControl.MinLodClamp(), + .maxLod = samplerControl.MaxLodClamp(), + .unnormalizedCoordinates = false, + }, vk::SamplerReductionModeCreateInfoEXT{ + .reductionMode = ConvertSamplerReductionFilter(samplerControl.reductionFilter), + }, vk::SamplerCustomBorderColorCreateInfoEXT{ + .customBorderColor.float32 = {{samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA}}, + .format = vk::Format::eUndefined, + }, + }; + + if (!gpu.quirks.supportsSamplerReductionMode) + samplerInfo.unlink(); + + vk::BorderColor &borderColor{samplerInfo.get().borderColor}; + if (gpu.quirks.supportsCustomBorderColor) { + borderColor = ConvertBorderColorWithCustom(samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA); + if (borderColor != vk::BorderColor::eFloatCustomEXT) + samplerInfo.unlink(); + } else { + borderColor = ConvertBorderColorFixed(samplerControl.borderColorR, samplerControl.borderColorG, samplerControl.borderColorB, samplerControl.borderColorA); + samplerInfo.unlink(); + } + + return sampler = std::make_shared(gpu.vkDevice, samplerInfo.get()); + } + public: void SetBindlessTextureConstantBufferIndex(u32 index) { bindlessTextureConstantBufferIndex = index; diff --git a/app/src/main/cpp/skyline/gpu/interconnect/types/tsc.h b/app/src/main/cpp/skyline/gpu/interconnect/types/tsc.h new file mode 100644 index 00000000..bae3d25c --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/types/tsc.h @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +// Copyright © 2018-2020 fincs (https://github.com/devkitPro/deko3d) + +#pragma once + +#include + +namespace skyline::gpu::interconnect { + /** + * @brief The Texture Sampler Control is a descriptor used to configure the texture sampler in Maxwell GPUs + * @url https://github.com/envytools/envytools/blob/master/rnndb/graph/g80_texture.xml#L367 + * @url https://github.com/devkitPro/deko3d/blob/00c12d1f4809014f1cc22719dd2e3476735eec64/source/maxwell/texture_sampler_control_block.h + */ + struct TextureSamplerControl { + enum class AddressMode : u32 { + Repeat = 0, + MirroredRepeat = 1, + ClampToEdge = 2, + ClampToBorder = 3, + Clamp = 4, + MirrorClampToEdge = 5, + MirrorClampToBorder = 6, + MirrorClamp = 7, + }; + + enum class CompareOp : u32 { + Never = 0, + Less = 1, + Equal = 2, + LessOrEqual = 3, + Greater = 4, + NotEqual = 5, + GreaterOrEqual = 6, + Always = 7, + }; + + enum class Filter : u32 { + Nearest = 1, + Linear = 2, + }; + + enum class MipFilter : u32 { + None = 1, + Nearest = 2, + Linear = 3, + }; + + enum class SamplerReduction : u32 { + WeightedAverage = 0, + Min = 1, + Max = 2, + }; + + // 0x00 + AddressMode addressModeU : 3; + AddressMode addressModeV : 3; + AddressMode addressModeP : 3; + u32 depthCompareEnable : 1; + CompareOp depthCompareOp : 3; + u32 srgbConversion : 1; + u32 fontFilterWidth : 3; + u32 fontFilterHeight : 3; + u32 maxAnisotropy : 3; + u32 _pad0_ : 9; + + // 0x04 + Filter magFilter : 2; + u32 _pad1_ : 2; + Filter minFilter : 2; + MipFilter mipFilter : 2; + u32 cubemapAnisotropy : 1; + u32 cubemapInterfaceFiltering : 1; + SamplerReduction reductionFilter : 2; + signed int mipLodBias : 13; + u32 floatCoordNormalization : 1; + u32 trilinearOptimization : 5; + u32 _pad2_ : 1; + + // 0x08 + u32 minLodClamp : 12; + u32 maxLodClamp : 12; + u32 srgbBorderColorR : 8; + + // 0x0C + u32 _pad3_ : 12; + u32 srgbBorderColorG : 8; + u32 srgbBorderColorB : 8; + u32 _pad4_ : 4; + + // 0x10 + float borderColorR; + + // 0x14 + float borderColorG; + + // 0x18 + float borderColorB; + + // 0x1C + float borderColorA; + + private: + /** + * @brief Convert a fixed point integer to a floating point integer + */ + template + float ConvertFixedToFloat(T fixed) { + return static_cast(fixed) / static_cast(1 << FractionalBits); + }; + + public: + bool operator==(const TextureSamplerControl&) const = default; + + float MaxAnisotropy() { + constexpr size_t AnisotropyCount{8}; //!< The amount of unique anisotropy values that can be represented (2^3 — 3-bit value) + constexpr std::array anisotropyLut{ + 1.0f, 3.14f, 5.28f, 7.42f, 9.57f, 11.71f, 13.85f, 16.0f + }; //!< A linear mapping of value range (0..7) to anisotropy range (1..16) calculated using `(index * 15 / 7) + 1` + return anisotropyLut[maxAnisotropy]; + } + + float MipLodBias() { + return ConvertFixedToFloat(mipLodBias); + } + + float MinLodClamp() { + return ConvertFixedToFloat(minLodClamp); + } + + float MaxLodClamp() { + return ConvertFixedToFloat(maxLodClamp); + } + }; + static_assert(sizeof(TextureSamplerControl) == 0x20); +} diff --git a/app/src/main/cpp/skyline/gpu/quirk_manager.cpp b/app/src/main/cpp/skyline/gpu/quirk_manager.cpp index d46fb128..acabb596 100644 --- a/app/src/main/cpp/skyline/gpu/quirk_manager.cpp +++ b/app/src/main/cpp/skyline/gpu/quirk_manager.cpp @@ -5,7 +5,7 @@ namespace skyline::gpu { QuirkManager::QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2) { - bool hasShaderAtomicInt64{}, hasShaderFloat16Int8Ext{}; + bool hasCustomBorderColorExtension{}, hasShaderAtomicInt64{}, hasShaderFloat16Int8Ext{}; for (auto &extension : deviceExtensions) { #define EXT_SET(name, property) \ @@ -28,6 +28,9 @@ namespace skyline::gpu { auto extensionVersion{extension.specVersion}; switch (util::Hash(extensionName)) { EXT_SET("VK_EXT_index_type_uint8", supportsUint8Indices); + EXT_SET("VK_EXT_sampler_mirror_clamp_to_edge", supportsSamplerMirrorClampToEdge); + EXT_SET("VK_EXT_sampler_filter_minmax", supportsSamplerReductionMode); + EXT_SET("VK_EXT_custom_border_color", hasCustomBorderColorExtension); EXT_SET("VK_EXT_provoking_vertex", supportsLastProvokingVertex); EXT_SET("VK_EXT_vertex_attribute_divisor", supportsVertexAttributeDivisor); EXT_SET("VK_EXT_shader_viewport_index_layer", supportsShaderViewportIndexLayer); @@ -53,6 +56,16 @@ namespace skyline::gpu { FEAT_SET(vk::PhysicalDeviceFeatures2, features.shaderInt64, supportsInt64) FEAT_SET(vk::PhysicalDeviceFeatures2, features.shaderStorageImageReadWithoutFormat, supportsImageReadWithoutFormat) + if (hasCustomBorderColorExtension) { + bool hasCustomBorderColorFeature{}; + FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColors, hasCustomBorderColorFeature) + if (hasCustomBorderColorFeature) + // We only want to mark custom border colors as supported if it can be done without supplying a format + FEAT_SET(vk::PhysicalDeviceCustomBorderColorFeaturesEXT, customBorderColorWithoutFormat, supportsCustomBorderColor) + } else { + enabledFeatures2.unlink(); + } + if (supportsVertexAttributeDivisor) { FEAT_SET(vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vertexAttributeInstanceRateDivisor, supportsVertexAttributeDivisor) FEAT_SET(vk::PhysicalDeviceVertexAttributeDivisorFeaturesEXT, vertexAttributeInstanceRateZeroDivisor, supportsVertexAttributeZeroDivisor) @@ -85,6 +98,9 @@ namespace skyline::gpu { } std::string QuirkManager::Summary() { - return fmt::format("\n* Supports U8 Indices: {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", supportsUint8Indices, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsSubgroupVote, subgroupSize); + return fmt::format( + "\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}", + supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsSubgroupVote, subgroupSize + ); } } diff --git a/app/src/main/cpp/skyline/gpu/quirk_manager.h b/app/src/main/cpp/skyline/gpu/quirk_manager.h index 1f27468d..2cd88c04 100644 --- a/app/src/main/cpp/skyline/gpu/quirk_manager.h +++ b/app/src/main/cpp/skyline/gpu/quirk_manager.h @@ -12,7 +12,10 @@ namespace skyline::gpu { */ class QuirkManager { public: - bool supportsUint8Indices{}; //!< If the device supports using uint8 indices in index buffers + bool supportsUint8Indices{}; //!< If the device supports using uint8 indices in index buffers (with VK_EXT_index_type_uint8) + bool supportsSamplerMirrorClampToEdge{}; //!< If the device supports a mirrored clamp to edge as a sampler address mode (with VK_KHR_sampler_mirror_clamp_to_edge) + bool supportsSamplerReductionMode{}; //!< If the device supports explicitly specifying a reduction mode for sampling (with VK_EXT_sampler_filter_minmax) + bool supportsCustomBorderColor{}; //!< If the device supports a custom border color without format (VK_EXT_custom_border_color) bool supportsLastProvokingVertex{}; //!< If the device supports setting the last vertex as the provoking vertex (with VK_EXT_provoking_vertex) bool supportsLogicOp{}; //!< If the device supports framebuffer logical operations during blending bool supportsVertexAttributeDivisor{}; //!< If the device supports a divisor for instance-rate vertex attributes (with VK_EXT_vertex_attribute_divisor) @@ -35,7 +38,7 @@ namespace skyline::gpu { using DeviceProperties2 = vk::StructureChain; - using DeviceFeatures2 = vk::StructureChain; + using DeviceFeatures2 = vk::StructureChain; QuirkManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector &deviceExtensions, std::vector> &enabledExtensions, const DeviceProperties2 &deviceProperties2); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 2b84ac5a..600d74fe 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -467,6 +467,16 @@ namespace skyline::soc::gm20b::engine::maxwell3d { context.SetBindlessTextureConstantBufferIndex(bindlessTextureConstantBufferIndex); }) + MAXWELL3D_STRUCT_STRUCT_CASE(samplerPool, address, high, { + context.SetSamplerPoolIovaHigh(high); + }) + MAXWELL3D_STRUCT_STRUCT_CASE(samplerPool, address, low, { + context.SetSamplerPoolIovaLow(low); + }) + MAXWELL3D_STRUCT_CASE(samplerPool, maximumIndex, { + context.SetSamplerPoolMaximumIndex(maximumIndex); + }) + default: break; }