// SPDX-License-Identifier: MPL-2.0 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) #include #include #include #include #include #include #include #include #include #include "shader_manager.h" static constexpr bool DumpShaders{false}; namespace Shader::Log { void Debug(const std::string &message) { skyline::Logger::Write(skyline::Logger::LogLevel::Debug, message); } void Warn(const std::string &message) { skyline::Logger::Write(skyline::Logger::LogLevel::Warn, message); } void Error(const std::string &message) { skyline::Logger::Write(skyline::Logger::LogLevel::Error, message); } } namespace skyline::gpu { void ShaderManager::LoadShaderReplacements(std::string_view replacementDir) { std::filesystem::path replacementDirPath{replacementDir}; if (std::filesystem::exists(replacementDirPath)) { for (const auto &entry : std::filesystem::directory_iterator{replacementDirPath}) { if (entry.is_regular_file()) { // Parse hash from filename auto path{entry.path()}; auto &replacementMap{path.extension().string() == ".spv" ? hostShaderReplacements : guestShaderReplacements}; u64 hash{std::stoull(path.stem().string(), nullptr, 16)}; auto it{replacementMap.insert({hash, {}})}; // Read file into map entry std::ifstream file{entry.path(), std::ios::binary | std::ios::ate}; it.first->second.resize(static_cast(file.tellg())); file.seekg(0, std::ios::beg); file.read(reinterpret_cast(it.first->second.data()), static_cast(it.first->second.size())); } } } } span ShaderManager::ProcessShaderBinary(bool spv, u64 hash, span binary) { auto &replacementMap{spv ? hostShaderReplacements : guestShaderReplacements}; auto it{replacementMap.find(hash)}; if (it != replacementMap.end()) { Logger::Info("Replacing shader with hash: 0x{:X}", hash); return it->second; } if (DumpShaders) { std::scoped_lock lock{dumpMutex}; auto shaderPath{dumpPath / fmt::format("{:016X}{}", hash, spv ? ".spv" : "")}; if (!std::filesystem::exists(shaderPath)) { std::ofstream file{shaderPath, std::ios::binary}; file.write(reinterpret_cast(binary.data()), static_cast(binary.size())); } } return binary; } ShaderManager::ShaderManager(const DeviceState &state, GPU &gpu, std::string_view replacementDir, std::string_view dumpDir) : gpu{gpu}, dumpPath{dumpDir} { LoadShaderReplacements(replacementDir); if constexpr (DumpShaders) { if (!std::filesystem::exists(dumpPath)) std::filesystem::create_directories(dumpPath); } auto &traits{gpu.traits}; hostTranslateInfo = Shader::HostTranslateInfo{ .support_float16 = traits.supportsFloat16, .support_int64 = traits.supportsInt64, .needs_demote_reorder = false, .support_snorm_render_buffer = true, .support_viewport_index_layer = gpu.traits.supportsShaderViewportIndexLayer, .min_ssbo_alignment = traits.minimumStorageBufferAlignment, .support_geometry_shader_passthrough = false }; constexpr u32 TegraX1WarpSize{32}; //!< The amount of threads in a warp on the Tegra X1 profile = Shader::Profile{ .supported_spirv = traits.supportsSpirv14 ? 0x00010400U : 0x00010000U, .unified_descriptor_binding = true, .support_descriptor_aliasing = !traits.quirks.brokenDescriptorAliasing, .support_int8 = traits.supportsInt8, .support_int16 = traits.supportsInt16, .support_int64 = traits.supportsInt64, .support_vertex_instance_id = false, .support_float_controls = traits.supportsFloatControls, .support_separate_denorm_behavior = traits.floatControls.denormBehaviorIndependence == vk::ShaderFloatControlsIndependence::eAll, .support_separate_rounding_mode = traits.floatControls.roundingModeIndependence == vk::ShaderFloatControlsIndependence::eAll, .support_fp16_denorm_preserve = static_cast(traits.floatControls.shaderDenormPreserveFloat16), .support_fp32_denorm_preserve = static_cast(traits.floatControls.shaderDenormPreserveFloat32), .support_fp16_denorm_flush = static_cast(traits.floatControls.shaderDenormFlushToZeroFloat16), .support_fp32_denorm_flush = static_cast(traits.floatControls.shaderDenormFlushToZeroFloat32), .support_fp16_signed_zero_nan_preserve = static_cast(traits.floatControls.shaderSignedZeroInfNanPreserveFloat16), .support_fp32_signed_zero_nan_preserve = static_cast(traits.floatControls.shaderSignedZeroInfNanPreserveFloat32), .support_fp64_signed_zero_nan_preserve = static_cast(traits.floatControls.shaderSignedZeroInfNanPreserveFloat64), .support_explicit_workgroup_layout = false, .support_vote = traits.supportsSubgroupVote, .support_viewport_index_layer_non_geometry = traits.supportsShaderViewportIndexLayer, .support_viewport_mask = false, .support_typeless_image_loads = traits.supportsImageReadWithoutFormat, .support_demote_to_helper_invocation = traits.supportsShaderDemoteToHelper, .support_int64_atomics = traits.supportsAtomicInt64, .support_derivative_control = true, .support_geometry_shader_passthrough = false, .support_native_ndc = false, .warp_size_potentially_larger_than_guest = TegraX1WarpSize < traits.subgroupSize, .lower_left_origin_mode = false, .need_declared_frag_colors = false, .has_broken_spirv_position_input = traits.quirks.brokenSpirvPositionInput, .has_broken_spirv_subgroup_mask_vector_extract_dynamic = traits.quirks.brokenSubgroupMaskExtractDynamic, .has_broken_spirv_subgroup_shuffle = traits.quirks.brokenSubgroupShuffle, .max_subgroup_size = traits.subgroupSize, .disable_subgroup_shuffle = *state.settings->disableSubgroupShuffle }; Shader::Settings::values = { #ifdef NDEBUG .renderer_debug = false, .disable_shader_loop_safety_checks = false, #else .renderer_debug = true, .disable_shader_loop_safety_checks = true, #endif .resolution_info = { .active = false, }, }; } /** * @brief A shader environment for all graphics pipeline stages */ class GraphicsEnvironment : public Shader::Environment { private: span binary; u32 baseOffset; u32 textureBufferIndex; bool viewportTransformEnabled; ShaderManager::ConstantBufferRead constantBufferRead; ShaderManager::GetTextureType getTextureType; public: GraphicsEnvironment(const std::array &postVtgShaderAttributeSkipMask, Shader::Stage pStage, span pBinary, u32 baseOffset, u32 textureBufferIndex, bool viewportTransformEnabled, ShaderManager::ConstantBufferRead constantBufferRead, ShaderManager::GetTextureType getTextureType) : binary{pBinary}, baseOffset{baseOffset}, textureBufferIndex{textureBufferIndex}, viewportTransformEnabled{viewportTransformEnabled}, constantBufferRead{std::move(constantBufferRead)}, getTextureType{std::move(getTextureType)} { gp_passthrough_mask = postVtgShaderAttributeSkipMask; stage = pStage; sph = *reinterpret_cast(binary.data()); start_address = baseOffset; is_propietary_driver = textureBufferIndex == 2; } [[nodiscard]] u64 ReadInstruction(u32 address) final { address -= baseOffset; if (binary.size() < (address + sizeof(u64))) throw exception("Out of bounds instruction read: 0x{:X}", address); return *reinterpret_cast(binary.data() + address); } [[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final { return constantBufferRead(index, offset); } [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final { throw exception("ReadTexturePixelFormat not implemented"); } [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final { return getTextureType(handle); } [[nodiscard]] u32 ReadViewportTransformState() final { return viewportTransformEnabled ? 1 : 0; // Only relevant for graphics shaders } [[nodiscard]] u32 TextureBoundBuffer() const final { return textureBufferIndex; } [[nodiscard]] u32 LocalMemorySize() const final { return static_cast(sph.LocalMemorySize()) + sph.common3.shader_local_memory_crs_size; } [[nodiscard]] u32 SharedMemorySize() const final { return 0; // Only relevant for compute shaders } [[nodiscard]] std::array WorkgroupSize() const final { return {0, 0, 0}; // Only relevant for compute shaders } [[nodiscard]] bool HasHLEMacroState() const final { return false; } [[nodiscard]] std::optional GetReplaceConstBuffer(u32 bank, u32 offset) final { return std::nullopt; } void Dump(u64 hash) final {} }; /** * @brief A shader environment for all compute pipeline stages */ class ComputeEnvironment : public Shader::Environment { private: span binary; u32 baseOffset; u32 textureBufferIndex; u32 localMemorySize; u32 sharedMemorySize; std::array workgroupDimensions; ShaderManager::ConstantBufferRead constantBufferRead; ShaderManager::GetTextureType getTextureType; public: ComputeEnvironment(span pBinary, u32 baseOffset, u32 textureBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array workgroupDimensions, ShaderManager::ConstantBufferRead constantBufferRead, ShaderManager::GetTextureType getTextureType) : binary{pBinary}, baseOffset{baseOffset}, textureBufferIndex{textureBufferIndex}, localMemorySize{localMemorySize}, sharedMemorySize{sharedMemorySize}, workgroupDimensions{workgroupDimensions}, constantBufferRead{std::move(constantBufferRead)}, getTextureType{std::move(getTextureType)} { stage = Shader::Stage::Compute; start_address = baseOffset; is_propietary_driver = textureBufferIndex == 2; } [[nodiscard]] u64 ReadInstruction(u32 address) final { address -= baseOffset; if (binary.size() < (address + sizeof(u64))) throw exception("Out of bounds instruction read: 0x{:X}", address); return *reinterpret_cast(binary.data() + address); } [[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final { return constantBufferRead(index, offset); } [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final { throw exception("ReadTexturePixelFormat not implemented"); } [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final { return getTextureType(handle); } [[nodiscard]] u32 ReadViewportTransformState() final { return 0; // Only relevant for graphics shaders } [[nodiscard]] u32 TextureBoundBuffer() const final { return textureBufferIndex; } [[nodiscard]] u32 LocalMemorySize() const final { return localMemorySize; } [[nodiscard]] u32 SharedMemorySize() const final { return sharedMemorySize; } [[nodiscard]] std::array WorkgroupSize() const final { return workgroupDimensions; } [[nodiscard]] bool HasHLEMacroState() const final { return false; } [[nodiscard]] std::optional GetReplaceConstBuffer(u32 bank, u32 offset) final { return std::nullopt; } void Dump(u64 hash) final {} }; /** * @brief A shader environment for VertexB during combination as it only requires the shader header and no higher level context */ class VertexBEnvironment : public Shader::Environment { public: explicit VertexBEnvironment(span binary) { sph = *reinterpret_cast(binary.data()); stage = Shader::Stage::VertexB; } [[nodiscard]] u64 ReadInstruction(u32 address) final { throw exception("Not implemented"); } [[nodiscard]] u32 ReadCbufValue(u32 index, u32 offset) final { throw exception("Not implemented"); } [[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) final { throw exception("Not implemented"); } [[nodiscard]] Shader::TexturePixelFormat ReadTexturePixelFormat(u32 handle) final { throw exception("Not implemented"); } [[nodiscard]] u32 ReadViewportTransformState() final { throw exception("Not implemented"); } [[nodiscard]] u32 TextureBoundBuffer() const final { throw exception("Not implemented"); } [[nodiscard]] u32 LocalMemorySize() const final { return static_cast(sph.LocalMemorySize()) + sph.common3.shader_local_memory_crs_size; } [[nodiscard]] u32 SharedMemorySize() const final { return 0; // Only relevant for compute shaders } [[nodiscard]] std::array WorkgroupSize() const final { return {0, 0, 0}; // Only relevant for compute shaders } [[nodiscard]] bool HasHLEMacroState() const final { return false; } [[nodiscard]] std::optional GetReplaceConstBuffer(u32 bank, u32 offset) final { return std::nullopt; } void Dump(u64 hash) final {} }; Shader::IR::Program ShaderManager::ParseGraphicsShader(const std::array &postVtgShaderAttributeSkipMask, Shader::Stage stage, u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, bool viewportTransformEnabled, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { binary = ProcessShaderBinary(false, hash, binary); std::scoped_lock lock{poolMutex}; GraphicsEnvironment environment{postVtgShaderAttributeSkipMask, stage, binary, baseOffset, textureConstantBufferIndex, viewportTransformEnabled, constantBufferRead, getTextureType}; Shader::Maxwell::Flow::CFG cfg{environment, flowBlockPool, Shader::Maxwell::Location{static_cast(baseOffset + sizeof(Shader::ProgramHeader))}}; return Shader::Maxwell::TranslateProgram(instructionPool, blockPool, environment, cfg, hostTranslateInfo); } Shader::IR::Program ShaderManager::CombineVertexShaders(Shader::IR::Program &vertexA, Shader::IR::Program &vertexB, span vertexBBinary) { std::scoped_lock lock{poolMutex}; VertexBEnvironment env{vertexBBinary}; return Shader::Maxwell::MergeDualVertexPrograms(vertexA, vertexB, env); } Shader::IR::Program ShaderManager::GenerateGeometryPassthroughShader(Shader::IR::Program &layerSource, Shader::OutputTopology topology) { std::scoped_lock lock{poolMutex}; return Shader::Maxwell::GenerateGeometryPassthrough(instructionPool, blockPool, hostTranslateInfo, layerSource, topology); } Shader::IR::Program ShaderManager::ParseComputeShader(u64 hash, span binary, u32 baseOffset, u32 textureConstantBufferIndex, u32 localMemorySize, u32 sharedMemorySize, std::array workgroupDimensions, const ConstantBufferRead &constantBufferRead, const GetTextureType &getTextureType) { binary = ProcessShaderBinary(false, hash, binary); std::scoped_lock lock{poolMutex}; ComputeEnvironment environment{binary, baseOffset, textureConstantBufferIndex, localMemorySize, sharedMemorySize, workgroupDimensions, constantBufferRead, getTextureType}; Shader::Maxwell::Flow::CFG cfg{environment, flowBlockPool, Shader::Maxwell::Location{static_cast(baseOffset)}}; return Shader::Maxwell::TranslateProgram(instructionPool, blockPool, environment, cfg, hostTranslateInfo); } vk::ShaderModule ShaderManager::CompileShader(const Shader::RuntimeInfo &runtimeInfo, Shader::IR::Program &program, Shader::Backend::Bindings &bindings, u64 hash) { std::scoped_lock lock{poolMutex}; if (program.info.loads.Legacy() || program.info.stores.Legacy()) Shader::Maxwell::ConvertLegacyToGeneric(program, runtimeInfo); auto spirvEmitted{Shader::Backend::SPIRV::EmitSPIRV(profile, runtimeInfo, program, bindings)}; auto spirv{ProcessShaderBinary(true, hash, span{spirvEmitted}.cast()).cast()}; vk::ShaderModuleCreateInfo createInfo{ .pCode = spirv.data(), .codeSize = spirv.size_bytes(), }; return (*gpu.vkDevice).createShaderModule(createInfo, nullptr, *gpu.vkDevice.getDispatcher()); } void ShaderManager::ResetPools() { std::scoped_lock lock{poolMutex}; instructionPool.ReleaseContents(); blockPool.ReleaseContents(); flowBlockPool.ReleaseContents(); } }