mirror of
https://github.com/Takiiiiiiii/strato.git
synced 2025-07-17 08:46:39 +00:00
Implement transform feedback with VK_EXT_transform_feedback
Tested to work in Xenoblade Chronicles DE, the code handles both hades varying input and buffer setup.
This commit is contained in:
@ -2817,6 +2817,146 @@ namespace skyline::gpu::interconnect {
|
||||
.rasterizationSamples = vk::SampleCountFlagBits::e1,
|
||||
};
|
||||
|
||||
/* Transform Feedback */
|
||||
private:
|
||||
bool transformFeedbackEnabled{};
|
||||
|
||||
struct TransformFeedbackBuffer {
|
||||
IOVA iova;
|
||||
u32 size;
|
||||
u32 offset;
|
||||
BufferView view;
|
||||
|
||||
u32 stride;
|
||||
u32 varyingCount;
|
||||
std::array<u8, maxwell3d::TransformFeedbackVaryingCount> varyings;
|
||||
};
|
||||
std::array<TransformFeedbackBuffer, maxwell3d::TransformFeedbackBufferCount> transformFeedbackBuffers{};
|
||||
|
||||
bool transformFeedbackVaryingsDirty;
|
||||
|
||||
struct TransformFeedbackBufferResult {
|
||||
BufferView view;
|
||||
bool wasCached;
|
||||
};
|
||||
|
||||
TransformFeedbackBufferResult GetTransformFeedbackBuffer(size_t idx) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
|
||||
if (!buffer.iova || !buffer.size)
|
||||
return {nullptr, false};
|
||||
else if (buffer.view)
|
||||
return {buffer.view, true};
|
||||
|
||||
auto mappings{channelCtx.asCtx->gmmu.TranslateRange(buffer.offset + buffer.iova, buffer.size)};
|
||||
if (mappings.size() != 1)
|
||||
Logger::Warn("Multiple buffer mappings ({}) are not supported", mappings.size());
|
||||
|
||||
auto mapping{mappings.front()};
|
||||
buffer.view = executor.AcquireBufferManager().FindOrCreate(span<u8>(mapping.data(), buffer.size), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||
executor.AttachLockedBuffer(buffer, std::move(lock));
|
||||
});
|
||||
|
||||
return {buffer.view, false};
|
||||
}
|
||||
|
||||
void FillTransformFeedbackVaryingState() {
|
||||
if (!transformFeedbackVaryingsDirty)
|
||||
return;
|
||||
|
||||
runtimeInfo.xfb_varyings.clear();
|
||||
|
||||
if (!transformFeedbackEnabled)
|
||||
return;
|
||||
|
||||
// Will be indexed by a u8 so allocate just enough space
|
||||
runtimeInfo.xfb_varyings.resize(256);
|
||||
for (u32 i{}; i < maxwell3d::TransformFeedbackBufferCount; i++) {
|
||||
const auto &buffer{transformFeedbackBuffers[i]};
|
||||
|
||||
for (u32 k{}; k < buffer.varyingCount; k++) {
|
||||
// TODO: We could merge multiple component accesses from the same attribute into one varying
|
||||
u8 attributeIndex{buffer.varyings[k]};
|
||||
runtimeInfo.xfb_varyings[attributeIndex] = {
|
||||
.buffer = i,
|
||||
.offset = k * 4,
|
||||
.stride = buffer.stride,
|
||||
.components = 1,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
transformFeedbackVaryingsDirty = false;
|
||||
}
|
||||
|
||||
void InvalidateTransformFeedbackVaryings() {
|
||||
transformFeedbackVaryingsDirty = true;
|
||||
|
||||
pipelineStages[maxwell3d::PipelineStage::Vertex].needsRecompile = true;
|
||||
pipelineStages[maxwell3d::PipelineStage::Geometry].needsRecompile = true;
|
||||
}
|
||||
|
||||
public:
|
||||
void SetTransformFeedbackEnabled(bool enable) {
|
||||
transformFeedbackEnabled = enable;
|
||||
|
||||
if (enable && !gpu.traits.supportsTransformFeedback)
|
||||
Logger::Warn("Transform feedback used without host GPU support!");
|
||||
|
||||
InvalidateTransformFeedbackVaryings();
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferEnabled(size_t idx, bool enabled) {
|
||||
if (!enabled)
|
||||
transformFeedbackBuffers[idx].iova = {};
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferIovaHigh(size_t idx, u32 high) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.iova.high = high;
|
||||
buffer.view = {};
|
||||
};
|
||||
|
||||
void SetTransformFeedbackBufferIovaLow(size_t idx, u32 low) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.iova.low = low;
|
||||
buffer.view = {};
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferSize(size_t idx, u32 size) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.size = size;
|
||||
buffer.view = {};
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferOffset(size_t idx, u32 offset) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.offset = offset;
|
||||
buffer.view = {};
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferStride(size_t idx, u32 stride) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.stride = stride;
|
||||
|
||||
InvalidateTransformFeedbackVaryings();
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferVaryingCount(size_t idx, u32 varyingCount) {
|
||||
auto &buffer{transformFeedbackBuffers[idx]};
|
||||
buffer.varyingCount = varyingCount;
|
||||
|
||||
InvalidateTransformFeedbackVaryings();
|
||||
}
|
||||
|
||||
void SetTransformFeedbackBufferVarying(size_t bufIdx, size_t varIdx, u32 value) {
|
||||
auto &buffer{transformFeedbackBuffers[bufIdx]};
|
||||
|
||||
span(buffer.varyings).cast<u32>()[varIdx] = value;
|
||||
|
||||
InvalidateTransformFeedbackVaryings();
|
||||
}
|
||||
|
||||
/* Draws */
|
||||
public:
|
||||
template<bool IsIndexed>
|
||||
@ -2900,6 +3040,34 @@ namespace skyline::gpu::interconnect {
|
||||
if (vertexAttribute.enabled)
|
||||
vertexAttributesDescriptions.push_back(vertexAttribute.description);
|
||||
|
||||
struct BoundTransformFeedbackBuffers {
|
||||
std::array<vk::Buffer, maxwell3d::TransformFeedbackBufferCount> handles{};
|
||||
std::array<vk::DeviceSize, maxwell3d::TransformFeedbackBufferCount> offsets{};
|
||||
std::array<vk::DeviceSize, maxwell3d::TransformFeedbackBufferCount> sizes{};
|
||||
};
|
||||
|
||||
std::shared_ptr<BoundTransformFeedbackBuffers> boundTransformFeedbackBuffers{};
|
||||
|
||||
if (transformFeedbackEnabled) {
|
||||
boundTransformFeedbackBuffers = std::allocate_shared<BoundTransformFeedbackBuffers, LinearAllocator<BoundVertexBuffers>>(executor.allocator);
|
||||
for (size_t i{}; i < maxwell3d::TransformFeedbackBufferCount; i++) {
|
||||
if (auto result{GetTransformFeedbackBuffer(i)}; result.view) {
|
||||
auto &view{result.view};
|
||||
executor.AttachBuffer(view);
|
||||
view->buffer->MarkGpuDirty();
|
||||
if (!result.wasCached) {
|
||||
boundTransformFeedbackBuffers->sizes[i] = view->view->size;
|
||||
view.RegisterUsage(executor.allocator, executor.cycle, [handle = boundTransformFeedbackBuffers->handles.data() + i, offset = boundTransformFeedbackBuffers->offsets.data() + i](const Buffer::BufferViewStorage &view, const std::shared_ptr<Buffer> &buffer) {
|
||||
*handle = buffer->GetBacking();
|
||||
*offset = view.offset;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FillTransformFeedbackVaryingState();
|
||||
|
||||
// Color Render Target + Blending Setup
|
||||
boost::container::static_vector<TextureView *, maxwell3d::RenderTargetCount> activeColorRenderTargets;
|
||||
for (u32 index{}; index < maxwell3d::RenderTargetCount; index++) {
|
||||
@ -2979,7 +3147,7 @@ namespace skyline::gpu::interconnect {
|
||||
}
|
||||
|
||||
// Submit Draw
|
||||
executor.AddSubpass([=, drawStorage = std::move(drawStorage), pipelineLayout = compiledPipeline.pipelineLayout, pipeline = compiledPipeline.pipeline](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
|
||||
executor.AddSubpass([=, drawStorage = std::move(drawStorage), pipelineLayout = compiledPipeline.pipelineLayout, pipeline = compiledPipeline.pipeline, transformFeedbackEnabled = transformFeedbackEnabled](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &cycle, GPU &, vk::RenderPass renderPass, u32 subpassIndex) mutable {
|
||||
auto &vertexBufferHandles{boundVertexBuffers->handles};
|
||||
for (u32 bindingIndex{}; bindingIndex != vertexBufferHandles.size(); bindingIndex++) {
|
||||
// We need to bind all non-null vertex buffers while skipping any null ones
|
||||
@ -3004,12 +3172,23 @@ namespace skyline::gpu::interconnect {
|
||||
|
||||
commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline);
|
||||
|
||||
if (transformFeedbackEnabled) {
|
||||
for (u32 i{}; i < maxwell3d::TransformFeedbackBufferCount; i++)
|
||||
if (boundTransformFeedbackBuffers->handles[i])
|
||||
commandBuffer.bindTransformFeedbackBuffersEXT(i, span(boundTransformFeedbackBuffers->handles).subspan(i, 1), span(boundTransformFeedbackBuffers->offsets).subspan(i, 1), span(boundTransformFeedbackBuffers->sizes).subspan(i, 1));
|
||||
|
||||
commandBuffer.beginTransformFeedbackEXT(0, {}, {});
|
||||
}
|
||||
|
||||
if (IsIndexed || boundIndexBuffer) {
|
||||
commandBuffer.bindIndexBuffer(boundIndexBuffer->handle, boundIndexBuffer->offset, boundIndexBuffer->type);
|
||||
commandBuffer.drawIndexed(count, instanceCount, first, vertexOffset, 0);
|
||||
} else {
|
||||
commandBuffer.draw(count, instanceCount, first, 0);
|
||||
}
|
||||
|
||||
if (transformFeedbackEnabled)
|
||||
commandBuffer.endTransformFeedbackEXT(0, {}, {});
|
||||
}, renderArea, {}, activeColorRenderTargets, depthRenderTargetView, !gpu.traits.quirks.relaxedRenderPassCompatibility);
|
||||
}
|
||||
|
||||
|
@ -98,7 +98,7 @@ namespace skyline::gpu::memory {
|
||||
Buffer MemoryManager::AllocateBuffer(vk::DeviceSize size) {
|
||||
vk::BufferCreateInfo bufferCreateInfo{
|
||||
.size = size,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer,
|
||||
.usage = vk::BufferUsageFlagBits::eTransferSrc | vk::BufferUsageFlagBits::eTransferDst | vk::BufferUsageFlagBits::eUniformTexelBuffer | vk::BufferUsageFlagBits::eStorageTexelBuffer | vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eIndexBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndirectBuffer | vk::BufferUsageFlagBits::eTransformFeedbackBufferEXT,
|
||||
.sharingMode = vk::SharingMode::eExclusive,
|
||||
.queueFamilyIndexCount = 1,
|
||||
.pQueueFamilyIndices = &gpu.vkQueueFamilyIndex,
|
||||
|
@ -6,7 +6,7 @@
|
||||
|
||||
namespace skyline::gpu {
|
||||
TraitManager::TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice &physicalDevice) : quirks(deviceProperties2.get<vk::PhysicalDeviceProperties2>().properties, deviceProperties2.get<vk::PhysicalDeviceDriverProperties>()) {
|
||||
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{};
|
||||
bool hasCustomBorderColorExt{}, hasShaderAtomicInt64Ext{}, hasShaderFloat16Int8Ext{}, hasShaderDemoteToHelperExt{}, hasVertexAttributeDivisorExt{}, hasProvokingVertexExt{}, hasPrimitiveTopologyListRestartExt{}, hasImagelessFramebuffersExt{}, hasTransformFeedbackExt{};
|
||||
bool supportsUniformBufferStandardLayout{}; // We require VK_KHR_uniform_buffer_standard_layout but assume it is implicitly supported even when not present
|
||||
|
||||
for (auto &extension : deviceExtensions) {
|
||||
@ -56,6 +56,7 @@ namespace skyline::gpu {
|
||||
EXT_SET("VK_KHR_shader_float_controls", supportsFloatControls);
|
||||
EXT_SET("VK_KHR_uniform_buffer_standard_layout", supportsUniformBufferStandardLayout);
|
||||
EXT_SET("VK_EXT_primitive_topology_list_restart", hasPrimitiveTopologyListRestartExt);
|
||||
EXT_SET("VK_EXT_transform_feedback", hasTransformFeedbackExt);
|
||||
}
|
||||
|
||||
#undef EXT_SET
|
||||
@ -138,6 +139,20 @@ namespace skyline::gpu {
|
||||
enabledFeatures2.unlink<vk::PhysicalDeviceImagelessFramebufferFeatures>();
|
||||
}
|
||||
|
||||
FEAT_SET(vk::PhysicalDeviceFeatures2, features.geometryShader, supportsGeometryShaders)
|
||||
|
||||
if (hasTransformFeedbackExt) {
|
||||
bool hasTransformFeedbackFeat{}, hasGeometryStreamsStreamsFeat{};
|
||||
FEAT_SET(vk::PhysicalDeviceTransformFeedbackFeaturesEXT, transformFeedback, hasTransformFeedbackFeat)
|
||||
FEAT_SET(vk::PhysicalDeviceTransformFeedbackFeaturesEXT, geometryStreams, hasGeometryStreamsStreamsFeat)
|
||||
|
||||
auto transformFeedbackProperties{deviceProperties2.get<vk::PhysicalDeviceTransformFeedbackPropertiesEXT>()};
|
||||
if (hasTransformFeedbackFeat && hasGeometryStreamsStreamsFeat && transformFeedbackProperties.transformFeedbackDraw)
|
||||
supportsTransformFeedback = true;
|
||||
} else {
|
||||
enabledFeatures2.unlink<vk::PhysicalDeviceTransformFeedbackFeaturesEXT>();
|
||||
}
|
||||
|
||||
#undef FEAT_SET
|
||||
|
||||
if (supportsFloatControls)
|
||||
@ -166,8 +181,8 @@ namespace skyline::gpu {
|
||||
|
||||
std::string TraitManager::Summary() {
|
||||
return fmt::format(
|
||||
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Anisotropic Filtering: {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Subgroup Vote: {}\n* Subgroup Size: {}\n* BCn Support: {}",
|
||||
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsAnisotropicFiltering, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsSubgroupVote, subgroupSize, bcnSupport.to_string()
|
||||
"\n* Supports U8 Indices: {}\n* Supports Sampler Mirror Clamp To Edge: {}\n* Supports Sampler Reduction Mode: {}\n* Supports Custom Border Color (Without Format): {}\n* Supports Anisotropic Filtering: {}\n* Supports Last Provoking Vertex: {}\n* Supports Logical Operations: {}\n* Supports Vertex Attribute Divisor: {}\n* Supports Vertex Attribute Zero Divisor: {}\n* Supports Push Descriptors: {}\n* Supports Imageless Framebuffers: {}\n* Supports Global Priority: {}\n* Supports Multiple Viewports: {}\n* Supports Shader Viewport Index: {}\n* Supports SPIR-V 1.4: {}\n* Supports Shader Invocation Demotion: {}\n* Supports 16-bit FP: {}\n* Supports 8-bit Integers: {}\n* Supports 16-bit Integers: {}\n* Supports 64-bit Integers: {}\n* Supports Atomic 64-bit Integers: {}\n* Supports Floating Point Behavior Control: {}\n* Supports Image Read Without Format: {}\n* Supports List Primitive Topology Restart: {}\n* Supports Patch List Primitive Topology Restart: {}\n* Supports Transform Feedback: {}\n* Supports Geometry Shaders: {}\n * Supports Subgroup Vote: {}\n* Subgroup Size: {}\n* BCn Support: {}",
|
||||
supportsUint8Indices, supportsSamplerMirrorClampToEdge, supportsSamplerReductionMode, supportsCustomBorderColor, supportsAnisotropicFiltering, supportsLastProvokingVertex, supportsLogicOp, supportsVertexAttributeDivisor, supportsVertexAttributeZeroDivisor, supportsPushDescriptors, supportsImagelessFramebuffers, supportsGlobalPriority, supportsMultipleViewports, supportsShaderViewportIndexLayer, supportsSpirv14, supportsShaderDemoteToHelper, supportsFloat16, supportsInt8, supportsInt16, supportsInt64, supportsAtomicInt64, supportsFloatControls, supportsImageReadWithoutFormat, supportsTopologyListRestart, supportsTopologyPatchListRestart, supportsTransformFeedback, supportsGeometryShaders, supportsSubgroupVote, subgroupSize, bcnSupport.to_string()
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -35,9 +35,11 @@ namespace skyline::gpu {
|
||||
bool supportsAtomicInt64{}; //!< If atomic operations on 64-bit integers are supported in shaders
|
||||
bool supportsFloatControls{}; //!< If extensive control over FP behavior is exposed (with VK_KHR_shader_float_controls)
|
||||
vk::PhysicalDeviceFloatControlsProperties floatControls{}; //!< Specifics of FP behavior control (All members will be zero'd out when unavailable)
|
||||
bool supportsTransformFeedback{}; //!< If the 'VK_EXT_transform_feedback' extension is supported with neccessary features for emulation
|
||||
bool supportsImageReadWithoutFormat{}; //!< If a storage image can be read without a format
|
||||
bool supportsTopologyListRestart{}; //!< If the device supports using primitive restart for topology lists (with VK_EXT_primitive_topology_list_restart)
|
||||
bool supportsTopologyPatchListRestart{}; //!< If the device supports using primitive restart for topology patch lists (with VK_EXT_primitive_topology_list_restart)
|
||||
bool supportsGeometryShaders; //!< If the device supports the 'geometryShader' Vulkan feature
|
||||
bool supportsSubgroupVote{}; //!< If subgroup votes are supported in shaders with SPV_KHR_subgroup_vote
|
||||
u32 subgroupSize{}; //!< Size of a subgroup on the host GPU
|
||||
|
||||
@ -74,6 +76,7 @@ namespace skyline::gpu {
|
||||
vk::PhysicalDeviceProperties2,
|
||||
vk::PhysicalDeviceDriverProperties,
|
||||
vk::PhysicalDeviceFloatControlsProperties,
|
||||
vk::PhysicalDeviceTransformFeedbackPropertiesEXT,
|
||||
vk::PhysicalDeviceSubgroupProperties>;
|
||||
|
||||
using DeviceFeatures2 = vk::StructureChain<
|
||||
@ -87,7 +90,8 @@ namespace skyline::gpu {
|
||||
vk::PhysicalDeviceShaderDrawParametersFeatures,
|
||||
vk::PhysicalDeviceProvokingVertexFeaturesEXT,
|
||||
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT,
|
||||
vk::PhysicalDeviceImagelessFramebufferFeatures>;
|
||||
vk::PhysicalDeviceImagelessFramebufferFeatures,
|
||||
vk::PhysicalDeviceTransformFeedbackFeaturesEXT>;
|
||||
|
||||
TraitManager(const DeviceFeatures2 &deviceFeatures2, DeviceFeatures2 &enabledFeatures2, const std::vector<vk::ExtensionProperties> &deviceExtensions, std::vector<std::array<char, VK_MAX_EXTENSION_NAME_SIZE>> &enabledExtensions, const DeviceProperties2 &deviceProperties2, const vk::raii::PhysicalDevice& physicalDevice);
|
||||
|
||||
|
Reference in New Issue
Block a user