Use semaphores for presentation engine frame synchronisation

Avoids waits on the CPU which can be costly and confuse the scheduler, also reduces latency significantly.
2025-07-17 08:46:39 +00:00 · 2022-10-16 20:47:17 +01:00
parent 0670e0e0dc
commit 1a0819fb76
4 changed files with 106 additions and 87 deletions
--- a/app/src/main/cpp/skyline/gpu/presentation_engine.cpp
+++ b/app/src/main/cpp/skyline/gpu/presentation_engine.cpp
@ -24,7 +24,8 @@ namespace skyline::gpu {
    PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu)
        : state{state},
          gpu{gpu},
-          acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}},
+          presentSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
+          acquireSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
          presentationTrack{static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()},
          vsyncEvent{std::make_shared<kernel::type::KEvent>(state, true)},
          choreographerThread{&PresentationEngine::ChoreographerThread, this},
@ -116,35 +117,31 @@ namespace skyline::gpu {
            windowScalingMode = frame.scalingMode;
        }

-        if (frame.transform != windowTransform) {
-            if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
-                throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
-            windowTransform = frame.transform;
-        }
+        if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
+            throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
+        windowTransform = frame.transform;

-        gpu.vkDevice.resetFences(*acquireFence);
+        auto &acquireSemaphore{acquireSemaphores[acquireSemaphoreIndex]};
+        acquireSemaphoreIndex = (acquireSemaphoreIndex + 1) % swapchainImageCount;

        std::pair<vk::Result, u32> nextImage;
-        while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
+        while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), *acquireSemaphore, {}), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
            if (nextImage.first == vk::Result::eSuboptimalKHR)
                surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
            else
                throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
        }
-        auto &nextImageTexture{images.at(nextImage.second)};

-        std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max());
+        auto &nextImageTexture{images.at(nextImage.second)};
+        auto &presentSemaphore{presentSemaphores[nextImage.second]};

        texture->SynchronizeHost();
-        nextImageTexture->CopyFrom(texture, vk::ImageSubresourceRange{
+        nextImageTexture->CopyFrom(texture, *acquireSemaphore, *presentSemaphore, vk::ImageSubresourceRange{
            .aspectMask = vk::ImageAspectFlagBits::eColor,
            .levelCount = 1,
            .layerCount = 1,
        });

-        // Wait on the copy to the swapchain image to complete before submitting for presentation
-        nextImageTexture->WaitOnFence();
-
        auto getMonotonicNsNow{[]() -> i64 {
            timespec time;
            if (clock_gettime(CLOCK_MONOTONIC, &time))
@ -194,6 +191,8 @@ namespace skyline::gpu {
                .swapchainCount = 1,
                .pSwapchains = &**vkSwapchain,
                .pImageIndices = &nextImage.second,
+                .waitSemaphoreCount = 1,
+                .pWaitSemaphores = &*presentSemaphore,
            }); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally
        }

@ -328,6 +327,7 @@ namespace skyline::gpu {

        swapchainFormat = format;
        swapchainExtent = extent;
+        swapchainImageCount = vkImages.size();
    }

    void PresentationEngine::UpdateSurface(jobject newSurface) {