Introduce FenceCycle Waiter Thread

A substantial amount of time is spent destroying dependencies for any threads waiting or polling `FenceCycle`s, this is not optimal as it blocks them from moving onto other tasks while destruction is a fundamentally async task and can be delayed.

This commit solves this by introducing a thread that is dedicated to waiting on every `FenceCycle` then signalling and destroying all dependencies which entirely fixes the issue of destruction blocking on more important threads.
This commit is contained in:
PixelyIon
2022-08-01 22:08:32 +05:30
parent 5f8619f791
commit e1a4325137
5 changed files with 118 additions and 42 deletions

View File

@ -2,19 +2,53 @@
// Copyright © 2021 Skyline Team and Contributors (https://github.com/skyline-emu/)
#include <gpu.h>
#include <loader/loader.h>
#include "command_scheduler.h"
namespace skyline::gpu {
void CommandScheduler::WaiterThread() {
if (int result{pthread_setname_np(pthread_self(), "Sky-CycleWaiter")})
Logger::Warn("Failed to set the thread name: {}", strerror(result));
try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
cycleQueue.Process([](const std::shared_ptr<FenceCycle> &cycle) {
cycle->Wait(true);
}, [] {});
} catch (const signal::SignalException &e) {
Logger::Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
if (state.process)
state.process->Kill(false);
else
std::rethrow_exception(std::current_exception());
} catch (const std::exception &e) {
Logger::Error(e.what());
if (state.process)
state.process->Kill(false);
else
std::rethrow_exception(std::current_exception());
}
}
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool)
: device(device),
commandBuffer(device, static_cast<VkCommandBuffer>(commandBuffer), static_cast<VkCommandPool>(*pool)),
fence(device, vk::FenceCreateInfo{}),
cycle(std::make_shared<FenceCycle>(device, *fence)) {}
CommandScheduler::CommandScheduler(GPU &pGpu) : gpu(pGpu), pool(std::ref(pGpu.vkDevice), vk::CommandPoolCreateInfo{
.flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = pGpu.vkQueueFamilyIndex,
}) {}
CommandScheduler::CommandScheduler(const DeviceState &state, GPU &pGpu)
: state{state},
gpu{pGpu},
waiterThread{&CommandScheduler::WaiterThread, this},
pool{std::ref(pGpu.vkDevice), vk::CommandPoolCreateInfo{
.flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
.queueFamilyIndex = pGpu.vkQueueFamilyIndex,
}} {}
CommandScheduler::~CommandScheduler() {
waiterThread.join();
}
CommandScheduler::ActiveCommandBuffer CommandScheduler::AllocateCommandBuffer() {
for (auto &slot : pool->buffers) {
@ -42,11 +76,15 @@ namespace skyline::gpu {
return {pool->buffers.emplace_back(gpu.vkDevice, commandBuffer, pool->vkCommandPool)};
}
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
std::scoped_lock lock(gpu.queueMutex);
gpu.vkQueue.submit(vk::SubmitInfo{
.commandBufferCount = 1,
.pCommandBuffers = &*commandBuffer,
}, fence);
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle) {
{
std::scoped_lock lock(gpu.queueMutex);
gpu.vkQueue.submit(vk::SubmitInfo{
.commandBufferCount = 1,
.pCommandBuffers = &*commandBuffer,
}, cycle->fence);
}
cycleQueue.Push(cycle);
}
}