diff --git a/app/CMakeLists.txt b/app/CMakeLists.txt index d4c2ca7a..06a68a58 100644 --- a/app/CMakeLists.txt +++ b/app/CMakeLists.txt @@ -205,6 +205,7 @@ add_library(skyline SHARED ${source_DIR}/skyline/gpu/interconnect/maxwell_3d/packed_pipeline_state.cpp ${source_DIR}/skyline/gpu/interconnect/maxwell_3d/pipeline_manager.cpp ${source_DIR}/skyline/gpu/interconnect/maxwell_3d/constant_buffers.cpp + ${source_DIR}/skyline/gpu/interconnect/maxwell_3d/queries.cpp ${source_DIR}/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp ${source_DIR}/skyline/gpu/interconnect/kepler_compute/pipeline_manager.cpp ${source_DIR}/skyline/gpu/interconnect/kepler_compute/pipeline_state.cpp diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp index a021f150..8b41a39b 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.cpp @@ -24,7 +24,8 @@ namespace skyline::gpu::interconnect::maxwell3d { samplers{manager, registerBundle.samplerPoolRegisters}, samplerBinding{registerBundle.samplerBinding}, textures{manager, registerBundle.texturePoolRegisters}, - directState{activeState.directState} { + directState{activeState.directState}, + queries{gpu} { ctx.executor.AddFlushCallback([this] { if (attachedDescriptorSets) { ctx.executor.AttachDependency(attachedDescriptorSets); @@ -38,6 +39,7 @@ namespace skyline::gpu::interconnect::maxwell3d { textures.MarkAllDirty(); quadConversionBufferAttached = false; constantBuffers.DisableQuickBind(); + queries.PurgeCaches(ctx); }); ctx.executor.AddPipelineChangeCallback([this] { @@ -415,4 +417,26 @@ namespace skyline::gpu::interconnect::maxwell3d { }, scissor, activeDescriptorSetSampledImages, {}, activeState.GetColorAttachments(), activeState.GetDepthAttachment(), !ctx.gpu.traits.quirks.relaxedRenderPassCompatibility, srcStageMask, dstStageMask); ctx.executor.AddCheckpoint("After indirect draw"); } + + void Maxwell3D::Query(soc::gm20b::IOVA address, engine::SemaphoreInfo::CounterType type, std::optional timestamp) { + if (type != engine::SemaphoreInfo::CounterType::SamplesPassed) { + Logger::Error("Unsupported query type: {}", static_cast(type)); + return; + } + + queries.Query(ctx, address, Queries::CounterType::Occulusion, timestamp); + } + + void Maxwell3D::ResetCounter(engine::ClearReportValue::Type type) { + if (type != engine::ClearReportValue::Type::ZPassPixelCount) { + Logger::Error("Unsupported query type: {}", static_cast(type)); + return; + } + + queries.ResetCounter(ctx, Queries::CounterType::Occulusion); + } + + bool Maxwell3D::QueryPresentAtAddress(soc::gm20b::IOVA address) { + return queries.QueryPresentAtAddress(address); + } } \ No newline at end of file diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h index ca095a58..fd6174e2 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/maxwell_3d.h @@ -6,9 +6,11 @@ #include #include #include +#include #include "common.h" #include "active_state.h" #include "constant_buffers.h" +#include "queries.h" namespace skyline::gpu::interconnect::maxwell3d { /** @@ -50,6 +52,7 @@ namespace skyline::gpu::interconnect::maxwell3d { std::shared_ptr quadConversionBuffer{}; bool quadConversionBufferAttached{}; BufferView indirectBufferView; + Queries queries; static constexpr size_t DescriptorBatchSize{0x100}; std::shared_ptr> attachedDescriptorSets; @@ -105,5 +108,11 @@ namespace skyline::gpu::interconnect::maxwell3d { void Draw(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, u32 count, u32 first, u32 instanceCount, u32 vertexOffset, u32 firstInstance); void DrawIndirect(engine::DrawTopology topology, bool transformFeedbackEnable, bool indexed, span indirectBuffer, u32 count, u32 stride); + + void Query(soc::gm20b::IOVA address, engine::SemaphoreInfo::CounterType type, std::optional timestamp); + + void ResetCounter(engine::ClearReportValue::Type type); + + bool QueryPresentAtAddress(soc::gm20b::IOVA address); }; } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.cpp b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.cpp new file mode 100644 index 00000000..a41baae5 --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.cpp @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#include +#include +#include +#include "queries.h" + +namespace skyline::gpu::interconnect::maxwell3d { + Queries::Counter::Counter(vk::raii::Device &device, vk::QueryType type) : pool{device, vk::QueryPoolCreateInfo{ + .queryType = type, + .queryCount = Counter::QueryPoolSize + }} {} + + std::function &, GPU &)> Queries::Counter::Prepare(InterconnectContext &ctx) { + auto currentRenderPassIndex{*ctx.executor.GetRenderPassIndex()}; + if (ctx.executor.executionTag != lastTag || lastRenderPassIndex != currentRenderPassIndex) { + lastTag = ctx.executor.executionTag; + lastRenderPassIndex = currentRenderPassIndex; + + // Allocate per-RP memory for tracking queries + queries = ctx.executor.allocator->AllocateUntracked(Counter::QueryPoolSize); + usedQueryCount = ctx.executor.allocator->EmplaceUntracked(); + queryActive = ctx.executor.allocator->EmplaceUntracked(); + std::memset(queries.data(), 0, queries.size_bytes()); + + recordOnNextEnd = true; + + // Reset the query pool up to the final used query count before the current RP begins + return [this, usedQueryCountPtr = this->usedQueryCount](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + commandBuffer.resetQueryPool(*pool, 0, *usedQueryCountPtr); + }; + } + + return {}; + } + + //TODO call cmdbuf begin + void Queries::Counter::Begin(InterconnectContext &ctx, bool atExecutionStart) { + auto prepareFunc{Prepare(ctx)}; + + *queryActive = true; + (*usedQueryCount)++; + + // Begin the query with the current query count as index + auto func{[this, queryIndex = *this->usedQueryCount - 1](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &) { + commandBuffer.beginQuery(*pool, queryIndex, vk::QueryControlFlagBits::ePrecise); + }}; + + if (atExecutionStart) { + ctx.executor.InsertPreExecuteCommand(std::move(func)); + + if (prepareFunc) + ctx.executor.InsertPreExecuteCommand(std::move(prepareFunc)); + } else { + if (prepareFunc) + ctx.executor.InsertPreRpCommand(std::move(prepareFunc)); + + ctx.executor.AddCommand(std::move(func)); + } + } + + // TODO must be called after begin in cmdbuf + void Queries::Counter::Report(InterconnectContext &ctx, BufferView view, std::optional timestamp) { + if (ctx.executor.executionTag != lastTag) + Begin(ctx, true); + + // End the query with the current query count as index + ctx.executor.AddCommand([=, this, queryIndex = *this->usedQueryCount - 1](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { + commandBuffer.endQuery(*pool, queryIndex); + }); + + *queryActive = false; + + // Allocate memory for the timestamp in the megabuffer since updateBuffer can be expensive + BufferBinding timestampBuffer{timestamp ? ctx.gpu.megaBufferAllocator.Push(ctx.executor.cycle, span(*timestamp).cast()) : BufferBinding{}}; + queries[*usedQueryCount - 1] = {view, timestampBuffer}; + + if (recordOnNextEnd) { + ctx.executor.InsertPostRpCommand([this, queriesPtr = this->queries, usedQueryCountPtr = this->usedQueryCount, queryActivePtr = this->queryActive](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { + if (*queryActivePtr) + commandBuffer.endQuery(*pool, *usedQueryCountPtr - 1); + + for (u32 i{}; i < *usedQueryCountPtr; i++) { + if (!queriesPtr[i].view) + continue; + + auto dstBinding{queriesPtr[i].view.GetBinding(gpu)}; + auto timestampSrcBinding{queriesPtr[i].timestampBinding}; + + commandBuffer.copyQueryPoolResults(*pool, i, 1, dstBinding.buffer, dstBinding.offset, 0, {}); + if (timestampSrcBinding) + commandBuffer.copyBuffer(timestampSrcBinding.buffer, dstBinding.buffer, {vk::BufferCopy{ + .size = 8, + .srcOffset = timestampSrcBinding.offset, + .dstOffset = dstBinding.offset + 8 + }}); + } + }); + recordOnNextEnd = false; + } + } + + // TODO must be called after begin in cmdbuf + // TODO call at exec end + void Queries::Counter::End(InterconnectContext &ctx) { + if (ctx.executor.executionTag != lastTag || !queryActive || !*queryActive) + return; + + // End the query with the current query count as index + ctx.executor.AddCommand([=, this, queryIndex = *this->usedQueryCount - 1](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr &, GPU &gpu) { + commandBuffer.endQuery(*pool, queryIndex); + }); + + *queryActive = false; + } + + Queries::Queries(GPU &gpu) : counters{{{gpu.vkDevice, vk::QueryType::eOcclusion}}} {} + + void Queries::Query(InterconnectContext &ctx, soc::gm20b::IOVA address, CounterType type, std::optional timestamp) { + view.Update(ctx, address, timestamp ? 16 : 4); + usedQueryAddresses.emplace(u64{address}); + ctx.executor.AttachBuffer(*view); + + auto &counter{counters[static_cast(type)]}; + + view->GetBuffer()->MarkGpuDirty(ctx.executor.usageTracker); + counter.Report(ctx, *view, timestamp); + counter.Begin(ctx); + } + + void Queries::ResetCounter(InterconnectContext &ctx, CounterType type) { + auto &counter{counters[static_cast(type)]}; + counter.End(ctx); + counter.Begin(ctx); + } + + void Queries::PurgeCaches(InterconnectContext &ctx) { + view.PurgeCaches(); + for (u32 i{}; i < static_cast(CounterType::MaxValue); i++) + counters[i].End(ctx); + } + + bool Queries::QueryPresentAtAddress(soc::gm20b::IOVA address) { + return usedQueryAddresses.contains(u64{address}); + } +} diff --git a/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.h b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.h new file mode 100644 index 00000000..c2e6eeec --- /dev/null +++ b/app/src/main/cpp/skyline/gpu/interconnect/maxwell_3d/queries.h @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: MPL-2.0 +// Copyright © 2023 Skyline Team and Contributors (https://github.com/skyline-emu/) + +#pragma once + +#include +#include +#include +#include "common.h" +#include "gpu/buffer.h" +#include "gpu/interconnect/common/common.h" + +namespace skyline::gpu::interconnect::maxwell3d { + /** + * @brief Handles using host Vulkan queries + */ + class Queries { + public: + enum class CounterType : u32 { + Occulusion = 0, + MaxValue + }; + + private: + /** + * @brief Represents a single query counter type + */ + class Counter { + private: + static constexpr size_t QueryPoolSize{0x1000}; //!< Size of the underlying VK query pool to use + + /** + * @brief Information required to report a single query with an optional timestamp + */ + struct Query { + BufferView view; //!< View to write the query result to + BufferBinding timestampBinding; //!< Binding to buffer containing timestamp to write out (optional) + }; + + vk::raii::QueryPool pool; + + ContextTag lastTag{}; //!< Execution tag at the last time a query was began + u32 lastRenderPassIndex{}; //!< Renderpass index at the last time a query was began + bool recordOnNextEnd{}; //!< If to record the query copying code upon ending the next query + + // A note on the below variables: In Vulkan you can begin/end queries in an RP but you can't copy the results. Since some games perform hundreds of queries in a row it's not ideal to have constantly end the RP. To work around this, queries are performed on a per-RP basis, with a reset of query 0->queryCount before the RP begins, and all the copies after the RP ends. Since per-RP storage is needed for this the below variables are linearly allocated and replaced upon new queries happening in a new RP. + span queries{}; //!< A list of queries reports to perform at the end of the current RP, linearly allocated + u32 *usedQueryCount{}; //!< Number of queries used from the pool in the current RP, linearly allocated + bool *queryActive{}; //!< If a query is active in the current RP, this is used so that the RP end code knows whether it needs to end the final query + + std::function &, GPU &)> Prepare(InterconnectContext &ctx); + + public: + Counter(vk::raii::Device &device, vk::QueryType type); + + /** + * @brief Begins a query in the command stream + * @param atExecutionStart Whether to insert the query begin at the start of the current executor or at the current position + */ + void Begin(InterconnectContext &ctx, bool atExecutionStart = false); + + /** + * @brief Records a query end, and a copy into the target buffer in the command stream + * @param view View to copy the query result into + * @param timestamp Optional timestamp to report along with the query + */ + void Report(InterconnectContext &ctx, BufferView view, std::optional timestamp); + + /** + * @brief Records a query end + */ + void End(InterconnectContext &ctx); + + }; + + std::array(CounterType::MaxValue)> counters; + + CachedMappedBufferView view; //!< Cached view for looking up query buffers from IOVAs + + std::unordered_set usedQueryAddresses; + + public: + Queries(GPU &gpu); + + /** + * @brief Records a query of the counter corresponding to `type` and writes the result to the supplied address + */ + void Query(InterconnectContext &ctx, soc::gm20b::IOVA address, CounterType type, std::optional timestamp); + + /** + * @brief Resets the counter value for `type` to the default + */ + void ResetCounter(InterconnectContext &ctx, CounterType type); + + void PurgeCaches(InterconnectContext &ctx); + + /** + * @return If a query has ever been reported to `address` + */ + bool QueryPresentAtAddress(soc::gm20b::IOVA address); + }; +} diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/initialization.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/initialization.cpp index a5e80efa..9fae1b6b 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/initialization.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/initialization.cpp @@ -276,6 +276,7 @@ namespace skyline::soc::gm20b::engine::maxwell3d { HandleMethod(0x86, 0x10); HandleMethod(0x4B7, 0x10); HandleMethod(0x365, 0x1); + HandleMethod(0x556, 0x1); HandleMethod(0x559, 0xFFF); HandleMethod(0x55F, 0xFFFFF); HandleMethod(0x584, 0x12); diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h index e75d4669..89aa4072 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell/types.h @@ -753,6 +753,33 @@ namespace skyline::soc::gm20b::engine::maxwell3d::type { }; static_assert(sizeof(ClearSurface) == sizeof(u32)); + struct ClearReportValue { + enum class Type : u32 { + ZPassPixelCount = 0x01, + ZCullStats = 0x02, + StreamingPrimitvesNeededMinusSucceeded = 0x03, + AlphaBetaClocks = 0x04, + StreamingPrimitivesSucceeded = 0x10, + StreamingPrimitivesNeeded = 0x11, + VerticesGenerated = 0x12, + PrimitivesGenerated = 0x13, + VertexShaderInvocations = 0x15, + TessellationInitInvocations = 0x16, + TessellationShaderInvocations = 0x17, + TessellationShaderPrimitivesGenerated = 0x18, + GeometryShaderInvocations = 0x1A, + GeometryShaderPrimitivesGenerated = 0x1B, + ClipperInvocations = 0x1C, + ClipperPrimitivesGenerated = 0x1D, + PixelShaderInvocations = 0x1E, + VtgPrimitivesOut = 0x1F + }; + + Type type : 5; + u32 _pad_ : 27; + }; + static_assert(sizeof(ClearReportValue) == sizeof(u32)); + struct SemaphoreInfo { enum class Op : u8 { Release = 0, diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp index 4d0a43de..6d645cba 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.cpp @@ -216,6 +216,10 @@ namespace skyline::soc::gm20b::engine::maxwell3d { i2m.LoadInlineData(*registers.i2m, loadInlineData); }) + ENGINE_CASE(clearReportValue, { + interconnect.ResetCounter(clearReportValue.type); + }) + ENGINE_CASE(syncpointAction, { Logger::Debug("Increment syncpoint: {}", static_cast(syncpointAction.id)); channelCtx.executor.Submit([=, syncpoints = &this->syncpoints, index = syncpointAction.id]() { @@ -360,11 +364,15 @@ namespace skyline::soc::gm20b::engine::maxwell3d { case type::SemaphoreInfo::Op::Counter: { switch (info.counterType) { case type::SemaphoreInfo::CounterType::Zero: - WriteSemaphoreResult(*registers.semaphore, registers.semaphore->payload); + channelCtx.executor.Submit([=, this, semaphore = *registers.semaphore]() { + WriteSemaphoreResult(semaphore, semaphore.payload); + }); break; case type::SemaphoreInfo::CounterType::SamplesPassed: // Return a fake result for now - WriteSemaphoreResult(*registers.semaphore, 0xffffff); + interconnect.Query({registers.semaphore->address}, info.counterType, + registers.semaphore->info.structureSize == type::SemaphoreInfo::StructureSize::FourWords ? + GetGpuTimeTicks() : std::optional{}); break; default: diff --git a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h index 68aa45b1..e094d0ee 100644 --- a/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h +++ b/app/src/main/cpp/skyline/soc/gm20b/engines/maxwell_3d.h @@ -260,6 +260,9 @@ namespace skyline::soc::gm20b::engine::maxwell3d { Register<0x547, u32> zCullStatCountersEnable; Register<0x548, u32> pointSpriteEnable; Register<0x54A, u32> shaderExceptions; + + Register<0x54C, type::ClearReportValue> clearReportValue; + Register<0x54D, u32> multisampleEnable; Register<0x54E, type::ZtSelect> ztSelect;