mirror of
https://github.com/Takiiiiiiii/strato.git
synced 2025-07-17 08:46:39 +00:00
Support using Vulkan semaphores with fence cycles
In some cases like presentation, it may be possible to avoid waiting on the CPU by using a semaphore to indicate GPU completion. Due to the binary nature of Vulkan semaphores this requires a fair bit of code as we need to ensure semaphores are always unsignalled before they are waited on and signalled again. This is achieved with a special kind of chained cycle that can be added even after guest GPFIFO processing for a given cycle, the main cycle's semaphore can be waited and then the cycle for the wait attached to the main cycle and it will be waited on before signalling.
This commit is contained in:
@ -32,10 +32,11 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool)
|
||||
: device(device),
|
||||
commandBuffer(device, static_cast<VkCommandBuffer>(commandBuffer), static_cast<VkCommandPool>(*pool)),
|
||||
fence(device, vk::FenceCreateInfo{}),
|
||||
cycle(std::make_shared<FenceCycle>(device, *fence)) {}
|
||||
: device{device},
|
||||
commandBuffer{device, static_cast<VkCommandBuffer>(commandBuffer), static_cast<VkCommandPool>(*pool)},
|
||||
fence{device, vk::FenceCreateInfo{}},
|
||||
semaphore{device, vk::SemaphoreCreateInfo{}},
|
||||
cycle{std::make_shared<FenceCycle>(device, *fence, *semaphore)} {}
|
||||
|
||||
CommandScheduler::CommandScheduler(const DeviceState &state, GPU &pGpu)
|
||||
: state{state},
|
||||
@ -55,7 +56,7 @@ namespace skyline::gpu {
|
||||
if (!slot.active.test_and_set(std::memory_order_acq_rel)) {
|
||||
if (slot.cycle->Poll()) {
|
||||
slot.commandBuffer.reset();
|
||||
slot.cycle = std::make_shared<FenceCycle>(slot.device, *slot.fence);
|
||||
slot.cycle = std::make_shared<FenceCycle>(*slot.cycle);
|
||||
return {slot};
|
||||
} else {
|
||||
slot.active.clear(std::memory_order_release);
|
||||
@ -76,12 +77,29 @@ namespace skyline::gpu {
|
||||
return {pool->buffers.emplace_back(gpu.vkDevice, commandBuffer, pool->vkCommandPool)};
|
||||
}
|
||||
|
||||
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, std::shared_ptr<FenceCycle> cycle) {
|
||||
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, std::shared_ptr<FenceCycle> cycle, span<vk::Semaphore> waitSemaphores, span<vk::Semaphore> signalSemaphores) {
|
||||
boost::container::small_vector<vk::Semaphore, 3> fullWaitSemaphores{waitSemaphores.begin(), waitSemaphores.end()};
|
||||
boost::container::small_vector<vk::PipelineStageFlags, 3> fullWaitStages{waitSemaphores.size(), vk::PipelineStageFlagBits::eAllCommands};
|
||||
|
||||
if (cycle->semaphoreSubmitWait) {
|
||||
fullWaitSemaphores.push_back(cycle->semaphore);
|
||||
// We don't need a full barrier since this is only done to ensure the semaphore is unsignalled
|
||||
fullWaitStages.push_back(vk::PipelineStageFlagBits::eTopOfPipe);
|
||||
}
|
||||
|
||||
boost::container::small_vector<vk::Semaphore, 2> fullSignalSemaphores{signalSemaphores.begin(), signalSemaphores.end()};
|
||||
fullSignalSemaphores.push_back(cycle->semaphore);
|
||||
|
||||
{
|
||||
std::scoped_lock lock(gpu.queueMutex);
|
||||
std::scoped_lock lock{gpu.queueMutex};
|
||||
gpu.vkQueue.submit(vk::SubmitInfo{
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &*commandBuffer,
|
||||
.waitSemaphoreCount = static_cast<u32>(waitSemaphores.size()),
|
||||
.pWaitSemaphores = fullWaitSemaphores.data(),
|
||||
.pWaitDstStageMask = fullWaitStages.data(),
|
||||
.signalSemaphoreCount = static_cast<u32>(fullSignalSemaphores.size()),
|
||||
.pSignalSemaphores = fullSignalSemaphores.data(),
|
||||
}, cycle->fence);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user