mirror of
https://github.com/Takiiiiiiii/strato.git
synced 2025-07-17 08:46:39 +00:00
Introduce ThreadLocal
Class + Fix Several GPU Bugs
* Fix `AddClearColorSubpass` bug where it would not generate a `VkCmdNextSubpass` when an attachment clear was utilized * Fix `AddSubpass` bug where the Depth Stencil texture would not be synced * Respect `VkCommandPool` external synchronization requirements by making it thread-local with a custom RAII wrapper * Fix linear RT width calculation as it's provided in terms of bytes rather than format units * Fix `AllocateStagingBuffer` bug where it would not supply `eTransferDst` as a usage flag * Fix `AllocateMappedImage` where `VkMemoryPropertyFlags` were not respected resulting in non-`eHostVisible` memory being utilized * Change feature requirement in `AndroidManifest.xml` to Vulkan 1.1 from OGL 3.1 as this was incorrect
This commit is contained in:
@ -20,21 +20,20 @@ namespace skyline::gpu {
|
||||
return false;
|
||||
}
|
||||
|
||||
CommandScheduler::CommandScheduler(GPU &pGpu) : gpu(pGpu), vkCommandPool(pGpu.vkDevice, vk::CommandPoolCreateInfo{
|
||||
CommandScheduler::CommandScheduler(GPU &pGpu) : gpu(pGpu), pool(std::ref(pGpu.vkDevice), vk::CommandPoolCreateInfo{
|
||||
.flags = vk::CommandPoolCreateFlagBits::eTransient | vk::CommandPoolCreateFlagBits::eResetCommandBuffer,
|
||||
.queueFamilyIndex = pGpu.vkQueueFamilyIndex,
|
||||
}) {}
|
||||
|
||||
CommandScheduler::ActiveCommandBuffer CommandScheduler::AllocateCommandBuffer() {
|
||||
std::scoped_lock lock(mutex);
|
||||
auto slot{std::find_if(commandBuffers.begin(), commandBuffers.end(), CommandBufferSlot::AllocateIfFree)};
|
||||
auto slotId{std::distance(commandBuffers.begin(), slot)};
|
||||
if (slot != commandBuffers.end())
|
||||
auto slot{std::find_if(pool->buffers.begin(), pool->buffers.end(), CommandBufferSlot::AllocateIfFree)};
|
||||
auto slotId{std::distance(pool->buffers.begin(), slot)};
|
||||
if (slot != pool->buffers.end())
|
||||
return ActiveCommandBuffer(*slot);
|
||||
|
||||
vk::CommandBuffer commandBuffer;
|
||||
vk::CommandBufferAllocateInfo commandBufferAllocateInfo{
|
||||
.commandPool = *vkCommandPool,
|
||||
.commandPool = *pool->vkCommandPool,
|
||||
.level = vk::CommandBufferLevel::ePrimary,
|
||||
.commandBufferCount = 1,
|
||||
};
|
||||
@ -42,7 +41,7 @@ namespace skyline::gpu {
|
||||
auto result{(*gpu.vkDevice).allocateCommandBuffers(&commandBufferAllocateInfo, &commandBuffer, *gpu.vkDevice.getDispatcher())};
|
||||
if (result != vk::Result::eSuccess)
|
||||
vk::throwResultException(result, __builtin_FUNCTION());
|
||||
return ActiveCommandBuffer(commandBuffers.emplace_back(gpu.vkDevice, commandBuffer, vkCommandPool));
|
||||
return ActiveCommandBuffer(pool->buffers.emplace_back(gpu.vkDevice, commandBuffer, pool->vkCommandPool));
|
||||
}
|
||||
|
||||
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <common/thread_local.h>
|
||||
#include "fence_cycle.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
@ -62,9 +63,19 @@ namespace skyline::gpu {
|
||||
};
|
||||
|
||||
GPU &gpu;
|
||||
std::mutex mutex; //!< Synchronizes mutations to the command pool due to allocations
|
||||
vk::raii::CommandPool vkCommandPool;
|
||||
std::list<CommandBufferSlot> commandBuffers;
|
||||
|
||||
/**
|
||||
* @brief A command pool designed to be thread-local to respect external synchronization for all command buffers and the associated pool
|
||||
* @note If we utilized a single global pool there would need to be a mutex around command buffer recording which would incur significant costs
|
||||
*/
|
||||
struct CommandPool {
|
||||
vk::raii::CommandPool vkCommandPool;
|
||||
std::list<CommandBufferSlot> buffers;
|
||||
|
||||
template<typename... Args>
|
||||
constexpr CommandPool(Args &&... args) : vkCommandPool(std::forward<Args>(args)...) {}
|
||||
};
|
||||
ThreadLocal<CommandPool> pool;
|
||||
|
||||
/**
|
||||
* @brief Allocates an existing or new primary command buffer from the pool
|
||||
|
@ -20,16 +20,18 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
void CommandExecutor::AddSubpass(const std::function<void(vk::raii::CommandBuffer &, const std::shared_ptr<FenceCycle> &, GPU &)> &function, vk::Rect2D renderArea, std::vector<TextureView> inputAttachments, std::vector<TextureView> colorAttachments, std::optional<TextureView> depthStencilAttachment) {
|
||||
for (const auto& attachments : {inputAttachments, colorAttachments})
|
||||
for (const auto& attachment : attachments)
|
||||
for (const auto &attachments : {inputAttachments, colorAttachments})
|
||||
for (const auto &attachment : attachments)
|
||||
syncTextures.emplace(attachment.backing.get());
|
||||
if (depthStencilAttachment)
|
||||
syncTextures.emplace(depthStencilAttachment->backing.get());
|
||||
|
||||
bool newRenderpass{CreateRenderpass(renderArea)};
|
||||
renderpass->AddSubpass(inputAttachments, colorAttachments, depthStencilAttachment ? &*depthStencilAttachment : nullptr);
|
||||
if (newRenderpass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), function);
|
||||
else
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>(), function);
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
||||
}
|
||||
|
||||
void CommandExecutor::AddClearColorSubpass(TextureView attachment, const vk::ClearColorValue &value) {
|
||||
@ -38,7 +40,10 @@ namespace skyline::gpu::interconnect {
|
||||
})};
|
||||
renderpass->AddSubpass({}, attachment, nullptr);
|
||||
|
||||
if (!renderpass->ClearColorAttachment(0, value)) {
|
||||
if (renderpass->ClearColorAttachment(0, value)) {
|
||||
if (!newRenderpass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>());
|
||||
} else {
|
||||
auto function{[scissor = attachment.backing->dimensions, value](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
||||
commandBuffer.clearAttachments(vk::ClearAttachment{
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
@ -54,7 +59,7 @@ namespace skyline::gpu::interconnect {
|
||||
if (newRenderpass)
|
||||
nodes.emplace_back(std::in_place_type_t<node::FunctionNode>(), function);
|
||||
else
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassNode>(), function);
|
||||
nodes.emplace_back(std::in_place_type_t<node::NextSubpassFunctionNode>(), function);
|
||||
}
|
||||
}
|
||||
|
||||
@ -73,12 +78,15 @@ namespace skyline::gpu::interconnect {
|
||||
|
||||
using namespace node;
|
||||
for (NodeVariant &node : nodes) {
|
||||
#define NODE(name) [&](name& node) { node(commandBuffer, cycle, gpu); }
|
||||
std::visit(VariantVisitor{
|
||||
[&](FunctionNode &node) { node(commandBuffer, cycle, gpu); },
|
||||
[&](RenderpassNode &node) { node(commandBuffer, cycle, gpu); },
|
||||
[&](NextSubpassNode &node) { node(commandBuffer, cycle, gpu); },
|
||||
[&](RenderpassEndNode &node) { node(commandBuffer, cycle, gpu); },
|
||||
NODE(FunctionNode),
|
||||
NODE(RenderpassNode),
|
||||
NODE(NextSubpassNode),
|
||||
NODE(NextSubpassFunctionNode),
|
||||
NODE(RenderpassEndNode),
|
||||
}, node);
|
||||
#undef NODE
|
||||
}
|
||||
|
||||
for (auto texture : syncTextures)
|
||||
|
@ -286,10 +286,19 @@ namespace skyline::gpu::interconnect::node {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A node which progresses to the next subpass during a renderpass
|
||||
*/
|
||||
struct NextSubpassNode {
|
||||
void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {
|
||||
commandBuffer.nextSubpass(vk::SubpassContents::eInline);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief A FunctionNode which progresses to the next subpass prior to calling the function
|
||||
*/
|
||||
struct NextSubpassNode : private FunctionNode {
|
||||
struct NextSubpassFunctionNode : private FunctionNode {
|
||||
using FunctionNode::FunctionNode;
|
||||
|
||||
void operator()(vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &gpu) {
|
||||
@ -307,5 +316,5 @@ namespace skyline::gpu::interconnect::node {
|
||||
}
|
||||
};
|
||||
|
||||
using NodeVariant = std::variant<FunctionNode, RenderpassNode, NextSubpassNode, RenderpassEndNode>; //!< A variant encompassing all command nodes types
|
||||
using NodeVariant = std::variant<FunctionNode, RenderpassNode, NextSubpassNode, NextSubpassFunctionNode, RenderpassEndNode>; //!< A variant encompassing all command nodes types
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ namespace skyline::gpu::interconnect {
|
||||
u32 gpuAddressHigh;
|
||||
};
|
||||
};
|
||||
u32 widthBytes; //!< The width in bytes for linear textures
|
||||
GuestTexture guest;
|
||||
std::optional<TextureView> view;
|
||||
|
||||
@ -74,6 +75,9 @@ namespace skyline::gpu::interconnect {
|
||||
|
||||
void SetRenderTargetWidth(size_t index, u32 value) {
|
||||
auto &renderTarget{renderTargets.at(index)};
|
||||
renderTarget.widthBytes = value;
|
||||
if (renderTarget.guest.tileConfig.mode == texture::TileMode::Linear && renderTarget.guest.format)
|
||||
value /= renderTarget.guest.format->bpb; // Width is in bytes rather than format units for linear textures
|
||||
renderTarget.guest.dimensions.width = value;
|
||||
renderTarget.view.reset();
|
||||
}
|
||||
@ -134,6 +138,10 @@ namespace skyline::gpu::interconnect {
|
||||
throw exception("Cannot translate the supplied RT format: 0x{:X}", static_cast<u32>(format));
|
||||
}
|
||||
}();
|
||||
|
||||
if (renderTarget.guest.tileConfig.mode == texture::TileMode::Linear && renderTarget.guest.format)
|
||||
renderTarget.guest.dimensions.width = renderTarget.widthBytes / renderTarget.guest.format->bpb;
|
||||
|
||||
renderTarget.disabled = !renderTarget.guest.format;
|
||||
renderTarget.view.reset();
|
||||
}
|
||||
@ -142,8 +150,17 @@ namespace skyline::gpu::interconnect {
|
||||
auto &renderTarget{renderTargets.at(index)};
|
||||
auto &config{renderTarget.guest.tileConfig};
|
||||
if (mode.isLinear) {
|
||||
if (config.mode != texture::TileMode::Linear && renderTarget.guest.format) {
|
||||
// Width is provided in bytes rather than format units for linear textures
|
||||
renderTarget.widthBytes = renderTarget.guest.dimensions.width;
|
||||
renderTarget.guest.dimensions.width /= renderTarget.guest.format->bpb;
|
||||
}
|
||||
|
||||
config.mode = texture::TileMode::Linear;
|
||||
} else [[likely]] {
|
||||
if (config.mode == texture::TileMode::Linear && renderTarget.guest.format)
|
||||
renderTarget.guest.dimensions.width = renderTarget.widthBytes;
|
||||
|
||||
config = texture::TileConfig{
|
||||
.mode = texture::TileMode::Block,
|
||||
.blockHeight = static_cast<u8>(1U << mode.blockHeightLog2),
|
||||
|
@ -78,7 +78,7 @@ namespace skyline::gpu::memory {
|
||||
std::shared_ptr<StagingBuffer> MemoryManager::AllocateStagingBuffer(vk::DeviceSize size) {
|
||||
vk::BufferCreateInfo bufferCreateInfo{
|
||||
.size = size,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferSrc,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst,
|
||||
.sharingMode = vk::SharingMode::eExclusive,
|
||||
.queueFamilyIndexCount = 1,
|
||||
.pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
|
||||
@ -112,7 +112,7 @@ namespace skyline::gpu::memory {
|
||||
Image MemoryManager::AllocateMappedImage(const vk::ImageCreateInfo &createInfo) {
|
||||
VmaAllocationCreateInfo allocationCreateInfo{
|
||||
.usage = VMA_MEMORY_USAGE_UNKNOWN,
|
||||
.memoryTypeBits = static_cast<u32>(vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eDeviceLocal),
|
||||
.requiredFlags = static_cast<VkMemoryPropertyFlags>(vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent | vk::MemoryPropertyFlagBits::eDeviceLocal),
|
||||
};
|
||||
|
||||
VkImage image;
|
||||
|
@ -484,6 +484,10 @@ namespace skyline::gpu {
|
||||
cycle = lCycle;
|
||||
}
|
||||
|
||||
Texture::~Texture() {
|
||||
WaitOnFence();
|
||||
}
|
||||
|
||||
TextureView::TextureView(std::shared_ptr<Texture> backing, vk::ImageViewType type, vk::ImageSubresourceRange range, texture::Format format, vk::ComponentMapping mapping) : backing(std::move(backing)), type(type), format(format), mapping(mapping), range(range) {}
|
||||
|
||||
vk::ImageView TextureView::GetView() {
|
||||
|
@ -371,6 +371,8 @@ namespace skyline::gpu {
|
||||
*/
|
||||
Texture(GPU &gpu, texture::Dimensions dimensions, texture::Format format, vk::ImageLayout initialLayout = vk::ImageLayout::eGeneral, vk::ImageUsageFlags usage = {}, vk::ImageTiling tiling = vk::ImageTiling::eOptimal, u32 mipLevels = 1, u32 layerCount = 1, vk::SampleCountFlagBits sampleCount = vk::SampleCountFlagBits::e1);
|
||||
|
||||
~Texture();
|
||||
|
||||
/**
|
||||
* @note The handle returned is nullable and the appropriate precautions should be taken
|
||||
*/
|
||||
|
Reference in New Issue
Block a user