Split NCE Trap page-out functionality from TrapRegions

The `TrapRegions` function performed a page-out on any regions that were trapped as read-only, this wasn't optimal as it would tie them both into the same operation while Buffers/Textures require to protect then synchronize and page-out. The trap was being moved to after the synchronize to get around this limitation but that can cause a potential race due to certain writes being done after the synchronization but prior to the trap which would be lost. This commit fixes these issues by splitting paging out into `PageOutRegions` which can be called after `TrapRegions` by any API users. Co-authored-by: Billy Laws <blaws05@gmail.com>
2025-07-17 08:46:39 +00:00 · 2022-08-04 23:46:44 +01:00
parent da464d84bc
commit ffad246d67
4 changed files with 40 additions and 21 deletions
--- a/app/src/main/cpp/skyline/gpu/buffer.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer.cpp
@ -100,14 +100,17 @@ namespace skyline::gpu {

        if (dirtyState == DirtyState::GpuDirty)
            return;
-        else if (dirtyState == DirtyState::CpuDirty)
+
+        gpu.state.nce->TrapRegions(*trapHandle, false); // This has to occur prior to any synchronization as it'll skip trapping
+
+        if (dirtyState == DirtyState::CpuDirty)
            SynchronizeHost(true); // Will transition the Buffer to Clean

        dirtyState = DirtyState::GpuDirty;
+        gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used
+
        BlockAllCpuBackingWrites();
        AdvanceSequence(); // The GPU will modify buffer contents so advance to the next sequence
-
-        gpu.state.nce->TrapRegions(*trapHandle, false);
    }

    void Buffer::WaitOnFence() {
--- a/app/src/main/cpp/skyline/gpu/texture/texture.cpp
+++ b/app/src/main/cpp/skyline/gpu/texture/texture.cpp
@ -691,14 +691,18 @@ namespace skyline::gpu {
                // If a texture is Clean then we can just transition it to being GPU dirty and retrap it
                dirtyState = DirtyState::GpuDirty;
                gpu.state.nce->TrapRegions(*trapHandle, false);
+                gpu.state.nce->PageOutRegions(*trapHandle);
                return;
            } else if (dirtyState != DirtyState::CpuDirty) {
                return; // If the texture has not been modified on the CPU, there is no need to synchronize it
            }

            dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
+            gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
        }

+        // From this point on Clean -> CPU dirty state transitions can occur, GPU dirty -> * transitions will always require the full lock to be held and thus won't occur
+
        auto stagingBuffer{SynchronizeHostImpl()};
        if (stagingBuffer) {
            auto lCycle{gpu.scheduler.Submit([&](vk::raii::CommandBuffer &commandBuffer) {
@ -709,7 +713,8 @@ namespace skyline::gpu {
            cycle = lCycle;
        }

-        gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
+        if (gpuDirty)
+            gpu.state.nce->PageOutRegions(*trapHandle); // All data can be paged out from the guest as the guest mirror won't be used
    }

    void Texture::SynchronizeHostInline(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &pCycle, bool gpuDirty) {
@ -723,12 +728,14 @@ namespace skyline::gpu {
            if (gpuDirty && dirtyState == DirtyState::Clean) {
                dirtyState = DirtyState::GpuDirty;
                gpu.state.nce->TrapRegions(*trapHandle, false);
+                gpu.state.nce->PageOutRegions(*trapHandle);
                return;
            } else if (dirtyState != DirtyState::CpuDirty) {
                return;
            }

            dirtyState = gpuDirty ? DirtyState::GpuDirty : DirtyState::Clean;
+            gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
        }

        auto stagingBuffer{SynchronizeHostImpl()};
@ -739,7 +746,8 @@ namespace skyline::gpu {
            cycle = pCycle;
        }

-        gpu.state.nce->TrapRegions(*trapHandle, !gpuDirty); // Trap any future CPU reads (optionally) + writes to this texture
+        if (gpuDirty)
+            gpu.state.nce->PageOutRegions(*trapHandle);
    }

    void Texture::SynchronizeGuest(bool cpuDirty, bool skipTrap) {