Fix BufferManager::FindOrCreate Recreation Bugs

It was determined that `FindOrCreate` has several issues which this commit fixes: * It wouldn't correctly handle locking of the newly created `Buffer` as the constructor would setup traps prior to being able to lock it which could lead to UB * It wouldn't propagate the `usedByContext`/`everHadInlineUpdate` flags correctly * It wouldn't correctly set the `dirtyState` of the buffer according to that of its source buffers
2025-07-21 13:37:23 +00:00 · 2022-07-17 18:18:33 +05:30
parent d1a682eace
commit 38d3ff4300
3 changed files with 93 additions and 83 deletions
--- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
@ -34,17 +34,31 @@ namespace skyline::gpu {
        vk::DeviceSize offset{static_cast<size_t>(guestMapping.begin().base() - alignedStart)}, size{guestMapping.size()};
        guestMapping = span<u8>{alignedStart, alignedEnd};

+        struct LockedBuffer {
+            std::shared_ptr<Buffer> buffer;
+            ContextLock<Buffer> lock;
+
+            LockedBuffer(std::shared_ptr<Buffer> pBuffer, ContextTag tag) : buffer{std::move(pBuffer)}, lock{tag, *buffer} {}
+
+            Buffer *operator->() const {
+                return buffer.get();
+            }
+
+            std::shared_ptr<Buffer> &operator*() {
+                return buffer;
+            }
+        };
+
        // Lookup for any buffers overlapping with the supplied guest mapping
-        boost::container::small_vector<std::shared_ptr<Buffer>, 4> overlaps;
+        boost::container::small_vector<LockedBuffer, 4> overlaps;
        for (auto entryIt{std::lower_bound(buffers.begin(), buffers.end(), guestMapping.end().base(), BufferLessThan)}; entryIt != buffers.begin() && (*--entryIt)->guest->begin() <= guestMapping.end();)
            if ((*entryIt)->guest->end() > guestMapping.begin())
-                overlaps.push_back(*entryIt);
+                overlaps.emplace_back(*entryIt, tag);

        if (overlaps.size() == 1) [[likely]] {
-            auto buffer{overlaps.front()};
+            auto &buffer{overlaps.front()};
            if (buffer->guest->begin() <= guestMapping.begin() && buffer->guest->end() >= guestMapping.end()) {
                // If we find a buffer which can entirely fit the guest mapping, we can just return a view into it
-                ContextLock bufferLock{tag, *buffer};
                return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
            }
        }
@ -59,20 +73,56 @@ namespace skyline::gpu {
                highestAddress = mapping.end().base();
        }

-        auto newBuffer{std::make_shared<Buffer>(gpu, span<u8>{lowestAddress, highestAddress}, tag, overlaps)};
-        ContextLock newLock{tag, *newBuffer};
-        bool hasAlreadyAttached{}; //!< If any overlapping view was already attached to the current context
-        for (auto &overlap : overlaps) {
-            ContextLock overlapLock{tag, *overlap};
+        LockedBuffer newBuffer{std::make_shared<Buffer>(gpu, span<u8>{lowestAddress, highestAddress}), tag}; // If we don't lock the buffer prior to trapping it during synchronization, a race could occur with a guest trap acquiring the lock before we do and mutating the buffer prior to it being ready

-            if (!overlapLock.IsFirstUsage())
-                hasAlreadyAttached = true;
+        newBuffer->SynchronizeHost(false); // Overlaps don't necessarily fully cover the buffer so we have to perform a sync here to prevent any gaps

-            buffers.erase(std::find(buffers.begin(), buffers.end(), overlap));
+        auto copyBuffer{[](auto dstGuest, auto srcGuest, auto dstPtr, auto srcPtr) {
+            if (dstGuest.begin().base() <= srcGuest.begin().base()) {
+                size_t dstOffset{static_cast<size_t>(srcGuest.begin().base() - dstGuest.begin().base())};
+                size_t copySize{std::min(dstGuest.size() - dstOffset, srcGuest.size())};
+                std::memcpy(dstPtr + dstOffset, srcPtr, copySize);
+            } else if (dstGuest.begin().base() > srcGuest.begin().base()) {
+                size_t srcOffset{static_cast<size_t>(dstGuest.begin().base() - srcGuest.begin().base())};
+                size_t copySize{std::min(dstGuest.size(), srcGuest.size() - srcOffset)};
+                std::memcpy(dstPtr, srcPtr + srcOffset, copySize);
+            }
+        }}; //!< Copies between two buffers based off of their mappings in guest memory
+
+        bool shouldAttach{}; //!< If the new buffer should be attached to the current context
+        for (auto &srcBuffer : overlaps) {
+            if (!srcBuffer.lock.IsFirstUsage())
+                // If any overlapping buffer was already attached to the current context, we should also attach the current context
+                shouldAttach = true;
+
+            // All newly created buffers that have this set are guaranteed to be attached in buffer FindOrCreate, attach will then lock the buffer without resetting this flag, which will only finally be reset when the lock is released
+            newBuffer->usedByContext |= srcBuffer->usedByContext;
+            newBuffer->everHadInlineUpdate |= srcBuffer->everHadInlineUpdate;
+
+            if (srcBuffer->cycle && newBuffer->cycle != srcBuffer->cycle)
+                if (newBuffer->cycle)
+                    newBuffer->cycle->ChainCycle(srcBuffer->cycle);
+                else
+                    newBuffer->cycle = srcBuffer->cycle;
+
+            if (srcBuffer->dirtyState == Buffer::DirtyState::GpuDirty) {
+                srcBuffer->WaitOnFence();
+
+                if (srcBuffer.lock.IsFirstUsage() && newBuffer->dirtyState != Buffer::DirtyState::GpuDirty)
+                    copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->mirror.data(), srcBuffer->backing.data());
+                else
+                    newBuffer->MarkGpuDirty();
+            } else if (srcBuffer->usedByContext) {
+                if (srcBuffer->dirtyState == Buffer::DirtyState::CpuDirty)
+                    Logger::Error("Buffer (0x{}-0x{}) is marked as CPU dirty while being utilized by the context, this is not valid", srcBuffer->guest->begin().base(), srcBuffer->guest->end().base());
+                // We need the backing to be stable so that any accesses within this context are sequenced correctly, we can't use the source mirror here either since buffer writes within this context will update the mirror on CPU and backing on GPU
+                srcBuffer->WaitOnFence();
+                copyBuffer(*newBuffer->guest, *srcBuffer->guest, newBuffer->backing.data(), srcBuffer->backing.data());
+            }

            // Transfer all views from the overlapping buffer to the new buffer with the new buffer and updated offset, ensuring pointer stability
-            vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(overlap->guest->begin() - newBuffer->guest->begin())};
-            for (auto it{overlap->views.begin()}; it != overlap->views.end(); it++) {
+            vk::DeviceSize overlapOffset{static_cast<vk::DeviceSize>(srcBuffer->guest->begin() - newBuffer->guest->begin())};
+            for (auto it{srcBuffer->views.begin()}; it != srcBuffer->views.end(); it++) {
                if (overlapOffset)
                    // This is a slight hack as we really shouldn't be changing the underlying non-mutable set elements without a rehash but without writing our own set impl this is the best we can do
                    const_cast<Buffer::BufferViewStorage *>(&*it)->offset += overlapOffset;
@ -84,26 +134,28 @@ namespace skyline::gpu {

            if (overlapOffset)
                // All current hashes are invalidated by above loop if overlapOffset is nonzero so rehash the container
-                overlap->views.rehash(0);
+                srcBuffer->views.rehash(0);

            // Merge the view sets, this will keep pointer stability hence avoiding any reallocation
-            newBuffer->views.merge(overlap->views);
+            newBuffer->views.merge(srcBuffer->views);

            // Transfer all delegates references from the overlapping buffer to the new buffer
-            for (auto &delegate : overlap->delegates) {
-                delegate->buffer = newBuffer;
+            for (auto &delegate : srcBuffer->delegates) {
+                delegate->buffer = *newBuffer;
                if (delegate->usageCallback)
-                    delegate->usageCallback(*delegate->view, newBuffer);
+                    delegate->usageCallback(*delegate->view, *newBuffer);
            }

-            newBuffer->delegates.splice(newBuffer->delegates.end(), overlap->delegates);
+            newBuffer->delegates.splice(newBuffer->delegates.end(), srcBuffer->delegates);
+
+            srcBuffer->Invalidate(); // Invalidate the overlapping buffer so it can't be synced in the future
+            buffers.erase(std::find(buffers.begin(), buffers.end(), srcBuffer.buffer));
        }

-        if (hasAlreadyAttached)
-            // We need to reattach the buffer if any overlapping views were already attached to the current context
-            attachBuffer(newBuffer, std::move(newLock));
+        if (shouldAttach)
+            attachBuffer(*newBuffer, std::move(newBuffer.lock));

-        buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest->end().base(), BufferLessThan), newBuffer);
+        buffers.insert(std::lower_bound(buffers.begin(), buffers.end(), newBuffer->guest->end().base(), BufferLessThan), *newBuffer);

        return newBuffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - newBuffer->guest->begin()) + offset, size);
    }