diff --git a/app/src/main/cpp/skyline/gpu/buffer.cpp b/app/src/main/cpp/skyline/gpu/buffer.cpp index 42862bbc..f53fc311 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.cpp +++ b/app/src/main/cpp/skyline/gpu/buffer.cpp @@ -345,7 +345,7 @@ namespace skyline::gpu { void Buffer::unlock() { tag = ContextTag{}; - backingImmutability = BackingImmutability::None; + AllowAllBackingWrites(); mutex.unlock(); } diff --git a/app/src/main/cpp/skyline/gpu/buffer.h b/app/src/main/cpp/skyline/gpu/buffer.h index f2d94461..342f6ffe 100644 --- a/app/src/main/cpp/skyline/gpu/buffer.h +++ b/app/src/main/cpp/skyline/gpu/buffer.h @@ -43,7 +43,7 @@ namespace skyline::gpu { class Buffer : public std::enable_shared_from_this { private: GPU &gpu; - SpinLock mutex; //!< Synchronizes any mutations to the buffer or its backing + RecursiveSpinLock mutex; //!< Synchronizes any mutations to the buffer or its backing std::atomic tag{}; //!< The tag associated with the last lock call memory::Buffer backing; std::optional guest; @@ -190,6 +190,11 @@ namespace skyline::gpu { backingImmutability = BackingImmutability::AllWrites; } + void AllowAllBackingWrites() { + std::scoped_lock lock{stateMutex}; + backingImmutability = BackingImmutability::None; + } + /** * @return If sequenced writes to the backing must not occur on the CPU * @note The buffer **must** be locked prior to calling this diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp index 4ce188bb..ee35fac5 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp @@ -1,6 +1,7 @@ // SPDX-License-Identifier: MPL-2.0 // Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/) +#include #include #include #include "command_executor.h" @@ -234,8 +235,12 @@ namespace skyline::gpu::interconnect { textureManagerLock.emplace(gpu.texture); bool didLock{view->LockWithTag(tag)}; - if (didLock) - attachedTextures.emplace_back(view->texture); + if (didLock) { + if (view->texture->FrequentlyLocked()) + attachedTextures.emplace_back(view->texture); + else + preserveAttachedTextures.emplace_back(view->texture); + } return didLock; } @@ -259,8 +264,12 @@ namespace skyline::gpu::interconnect { bufferManagerLock.emplace(gpu.buffer); bool didLock{view.LockWithTag(tag)}; - if (didLock) - attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); + if (didLock) { + if (view.GetBuffer()->FrequentlyLocked()) + attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); + else + preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); + } return didLock; } @@ -271,14 +280,20 @@ namespace skyline::gpu::interconnect { if (lock.OwnsLock()) { // Transfer ownership to executor so that the resource will stay locked for the period it is used on the GPU - attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); + if (view.GetBuffer()->FrequentlyLocked()) + attachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); + else + preserveAttachedBuffers.emplace_back(view.GetBuffer()->shared_from_this()); lock.Release(); // The executor will handle unlocking the lock so it doesn't need to be handled here } } void CommandExecutor::AttachLockedBuffer(std::shared_ptr buffer, ContextLock &&lock) { if (lock.OwnsLock()) { - attachedBuffers.emplace_back(std::move(buffer)); + if (buffer->FrequentlyLocked()) + attachedBuffers.emplace_back(std::move(buffer)); + else + preserveAttachedBuffers.emplace_back(std::move(buffer)); lock.Release(); // See AttachLockedBufferView(...) } } @@ -381,24 +396,25 @@ namespace skyline::gpu::interconnect { }, {}, {} ); - for (const auto &texture : attachedTextures) + boost::container::small_vector chainedCycles; + for (const auto &texture : ranges::views::concat(attachedTextures, preserveAttachedTextures)) { texture->SynchronizeHostInline(slot->commandBuffer, cycle, true); + // We don't need to attach the Texture to the cycle as a TextureView will already be attached + if (ranges::find(chainedCycles, texture->cycle.get()) == chainedCycles.end()) { + cycle->ChainCycle(texture->cycle); + chainedCycles.emplace_back(texture->cycle.get()); + } + + texture->cycle = cycle; + } } - for (const auto &attachedBuffer : attachedBuffers) - if (attachedBuffer->SequencedCpuBackingWritesBlocked()) + for (const auto &attachedBuffer : ranges::views::concat(attachedBuffers, preserveAttachedBuffers)) { + if (attachedBuffer->RequiresCycleAttach()) { attachedBuffer->SynchronizeHost(); // Synchronize attached buffers from the CPU without using a staging buffer - - for (const auto &attachedTexture : attachedTextures) { - // We don't need to attach the Texture to the cycle as a TextureView will already be attached - cycle->ChainCycle(attachedTexture->cycle); - attachedTexture->cycle = cycle; - } - - for (const auto &attachedBuffer : attachedBuffers) { - if (attachedBuffer->RequiresCycleAttach() ) { cycle->AttachObject(attachedBuffer.buffer); attachedBuffer->UpdateCycle(cycle); + attachedBuffer->AllowAllBackingWrites(); } } @@ -412,6 +428,12 @@ namespace skyline::gpu::interconnect { bufferManagerLock.reset(); megaBufferAllocatorLock.reset(); allocator->Reset(); + + // Periodically clear preserve attachments just in case there are new waiters which would otherwise end up waiting forever + if ((submissionNumber % CommandRecordThread::ActiveRecordSlots * 2) == 0) { + preserveAttachedBuffers.clear(); + preserveAttachedTextures.clear(); + } } void CommandExecutor::Submit() { @@ -423,7 +445,33 @@ namespace skyline::gpu::interconnect { if (!slot->nodes.empty()) { TRACE_EVENT("gpu", "CommandExecutor::Submit"); SubmitInternal(); + submissionNumber++; } + ResetInternal(); } + + void CommandExecutor::LockPreserve() { + if (!preserveLocked) { + preserveLocked = true; + + for (auto &buffer : preserveAttachedBuffers) + buffer->LockWithTag(tag); + + for (auto &texture : preserveAttachedTextures) + texture->LockWithTag(tag); + } + } + + void CommandExecutor::UnlockPreserve() { + if (preserveLocked) { + for (auto &buffer : preserveAttachedBuffers) + buffer->unlock(); + + for (auto &texture : preserveAttachedTextures) + texture->unlock(); + + preserveLocked = false; + } + } } diff --git a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h index 9f7a9d5e..874426fb 100644 --- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h +++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h @@ -15,6 +15,8 @@ namespace skyline::gpu::interconnect { */ class CommandRecordThread { public: + static constexpr size_t ActiveRecordSlots{6}; //!< Maximum number of simultaneously active slots + /** * @brief Single execution slot, buffered back and forth between the GPFIFO thread and the record thread */ @@ -40,7 +42,6 @@ namespace skyline::gpu::interconnect { const DeviceState &state; std::thread thread; - static constexpr size_t ActiveRecordSlots{6}; //!< Maximum number of simultaneously active slots CircularQueue incoming{ActiveRecordSlots}; //!< Slots pending recording CircularQueue outgoing{ActiveRecordSlots}; //!< Slots that have been submitted, may still be active on the GPU @@ -76,6 +77,7 @@ namespace skyline::gpu::interconnect { std::optional> textureManagerLock; //!< The lock on the texture manager, this is locked for the duration of the command execution from the first usage inside an execution to the submission std::optional> bufferManagerLock; //!< The lock on the buffer manager, see above for details std::optional> megaBufferAllocatorLock; //!< The lock on the megabuffer allocator, see above for details + bool preserveLocked{}; /** * @brief A wrapper of a Texture object that has been locked beforehand and must be unlocked afterwards @@ -94,6 +96,7 @@ namespace skyline::gpu::interconnect { ~LockedTexture(); }; + std::vector preserveAttachedTextures; std::vector attachedTextures; //!< All textures that are attached to the current execution /** @@ -113,6 +116,7 @@ namespace skyline::gpu::interconnect { ~LockedBuffer(); }; + std::vector preserveAttachedBuffers; std::vector attachedBuffers; //!< All textures that are attached to the current execution @@ -154,6 +158,7 @@ namespace skyline::gpu::interconnect { std::shared_ptr cycle; //!< The fence cycle that this command executor uses to wait for the GPU to finish executing commands LinearAllocatorState<> *allocator; ContextTag tag; //!< The tag associated with this command executor, any tagged resource locking must utilize this tag + size_t submissionNumber{}; u32 executionNumber{}; CommandExecutor(const DeviceState &state); @@ -258,5 +263,17 @@ namespace skyline::gpu::interconnect { * @brief Execute all the nodes and submit the resulting command buffer to the GPU */ void Submit(); + + /** + * @brief Locks all preserve attached buffers/textures + * @note This **MUST** be called before attaching any buffers/textures to an execution + */ + void LockPreserve(); + + /** + * @brief Unlocks all preserve attached buffers/textures + * @note This **MUST** be called when there is no GPU work left to be done to avoid deadlocks where the guest will try to lock a buffer/texture but the GPFIFO thread has no work so won't periodically unlock it + */ + void UnlockPreserve(); }; } diff --git a/app/src/main/cpp/skyline/gpu/texture/texture.h b/app/src/main/cpp/skyline/gpu/texture/texture.h index 000a2122..f1e8da30 100644 --- a/app/src/main/cpp/skyline/gpu/texture/texture.h +++ b/app/src/main/cpp/skyline/gpu/texture/texture.h @@ -3,6 +3,7 @@ #pragma once +#include #include #include #include @@ -356,9 +357,9 @@ namespace skyline::gpu { class Texture : public std::enable_shared_from_this { private: GPU &gpu; - std::mutex mutex; //!< Synchronizes any mutations to the texture or its backing + RecursiveSpinLock mutex; //!< Synchronizes any mutations to the texture or its backing std::atomic tag{}; //!< The tag associated with the last lock call - std::condition_variable backingCondition; //!< Signalled when a valid backing has been swapped in + std::condition_variable_any backingCondition; //!< Signalled when a valid backing has been swapped in using BackingType = std::variant; BackingType backing; //!< The Vulkan image that backs this texture, it is nullable