Use semaphores for presentation engine frame synchronisation

Avoids waits on the CPU which can be costly and confuse the scheduler, also reduces latency significantly.
This commit is contained in:
Billy Laws
2022-10-16 20:47:17 +01:00
parent 0670e0e0dc
commit 1a0819fb76
4 changed files with 106 additions and 87 deletions

View File

@ -24,7 +24,8 @@ namespace skyline::gpu {
PresentationEngine::PresentationEngine(const DeviceState &state, GPU &gpu)
: state{state},
gpu{gpu},
acquireFence{gpu.vkDevice, vk::FenceCreateInfo{}},
presentSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
acquireSemaphores{util::MakeFilledArray<vk::raii::Semaphore, MaxSwapchainImageCount>(gpu.vkDevice, vk::SemaphoreCreateInfo{})},
presentationTrack{static_cast<u64>(trace::TrackIds::Presentation), perfetto::ProcessTrack::Current()},
vsyncEvent{std::make_shared<kernel::type::KEvent>(state, true)},
choreographerThread{&PresentationEngine::ChoreographerThread, this},
@ -116,35 +117,31 @@ namespace skyline::gpu {
windowScalingMode = frame.scalingMode;
}
if (frame.transform != windowTransform) {
if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
windowTransform = frame.transform;
}
if ((result = window->perform(window, NATIVE_WINDOW_SET_BUFFERS_TRANSFORM, static_cast<i32>(frame.transform))))
throw exception("Setting the buffer transform to '{}' failed with {}", ToString(frame.transform), result);
windowTransform = frame.transform;
gpu.vkDevice.resetFences(*acquireFence);
auto &acquireSemaphore{acquireSemaphores[acquireSemaphoreIndex]};
acquireSemaphoreIndex = (acquireSemaphoreIndex + 1) % swapchainImageCount;
std::pair<vk::Result, u32> nextImage;
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), {}, *acquireFence), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
while (nextImage = vkSwapchain->acquireNextImage(std::numeric_limits<u64>::max(), *acquireSemaphore, {}), nextImage.first != vk::Result::eSuccess) [[unlikely]] {
if (nextImage.first == vk::Result::eSuboptimalKHR)
surfaceCondition.wait(lock, [this]() { return vkSurface.has_value(); });
else
throw exception("vkAcquireNextImageKHR returned an unhandled result '{}'", vk::to_string(nextImage.first));
}
auto &nextImageTexture{images.at(nextImage.second)};
std::ignore = gpu.vkDevice.waitForFences(*acquireFence, true, std::numeric_limits<u64>::max());
auto &nextImageTexture{images.at(nextImage.second)};
auto &presentSemaphore{presentSemaphores[nextImage.second]};
texture->SynchronizeHost();
nextImageTexture->CopyFrom(texture, vk::ImageSubresourceRange{
nextImageTexture->CopyFrom(texture, *acquireSemaphore, *presentSemaphore, vk::ImageSubresourceRange{
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = 1,
});
// Wait on the copy to the swapchain image to complete before submitting for presentation
nextImageTexture->WaitOnFence();
auto getMonotonicNsNow{[]() -> i64 {
timespec time;
if (clock_gettime(CLOCK_MONOTONIC, &time))
@ -194,6 +191,8 @@ namespace skyline::gpu {
.swapchainCount = 1,
.pSwapchains = &**vkSwapchain,
.pImageIndices = &nextImage.second,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &*presentSemaphore,
}); // We don't care about suboptimal images as they are caused by not respecting the transform hint, we handle transformations externally
}
@ -328,6 +327,7 @@ namespace skyline::gpu {
swapchainFormat = format;
swapchainExtent = extent;
swapchainImageCount = vkImages.size();
}
void PresentationEngine::UpdateSurface(jobject newSurface) {