mirror of
https://github.com/Takiiiiiiii/strato.git
synced 2025-07-17 08:46:39 +00:00
Implement accelerated uploads/copies through buffer manager
Previously, both I2M uploads and DMA copies would force GPU serialisation if they happened to hit a trap or were used to copy GPU dirty buffers. By using the buffer manager to implement them on the host GPU we can avoid such slowdowns entiely.
This commit is contained in:
50
app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp
Normal file
50
app/src/main/cpp/skyline/gpu/interconnect/inline2memory.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu/buffer_manager.h>
|
||||
#include <soc/gm20b/gmmu.h>
|
||||
#include <soc/gm20b/channel.h>
|
||||
#include "inline2memory.h"
|
||||
|
||||
namespace skyline::gpu::interconnect {
|
||||
using IOVA = soc::gm20b::IOVA;
|
||||
|
||||
Inline2Memory::Inline2Memory(GPU &gpu, soc::gm20b::ChannelContext &channelCtx)
|
||||
: gpu{gpu},
|
||||
channelCtx{channelCtx},
|
||||
executor{channelCtx.executor} {}
|
||||
|
||||
void Inline2Memory::Upload(IOVA dst, span<u32> src) {
|
||||
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, src.size_bytes())};
|
||||
|
||||
if (dstMappings.size() > 1)
|
||||
Logger::Warn("Split mapping are unsupported for DMA copies");
|
||||
|
||||
auto dstBuf{gpu.buffer.FindOrCreate(dstMappings.front(), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||
executor.AttachLockedBuffer(buffer, std::move(lock));
|
||||
})};
|
||||
ContextLock dstBufLock{executor.tag, dstBuf};
|
||||
|
||||
|
||||
dstBuf.Write(src.cast<u8>(), 0, [&]() {
|
||||
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
|
||||
|
||||
auto srcGpuAllocation{gpu.megaBufferAllocator.Push(executor.cycle, src.cast<u8>())};
|
||||
executor.AddOutsideRpCommand([srcGpuAllocation, dstBuf, src](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
||||
vk::BufferCopy copyRegion{
|
||||
.size = src.size_bytes(),
|
||||
.srcOffset = srcGpuAllocation.offset,
|
||||
.dstOffset = dstBuf.GetOffset()
|
||||
};
|
||||
commandBuffer.copyBuffer(srcGpuAllocation.buffer, dstBuf.GetBuffer()->GetBacking(), copyRegion);
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite
|
||||
}, {}, {});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
36
app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h
Normal file
36
app/src/main/cpp/skyline/gpu/interconnect/inline2memory.h
Normal file
@ -0,0 +1,36 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <soc/gm20b/gmmu.h>
|
||||
|
||||
namespace skyline::gpu {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
struct ChannelContext;
|
||||
}
|
||||
|
||||
namespace skyline::gpu::interconnect {
|
||||
class CommandExecutor;
|
||||
|
||||
/**
|
||||
* @brief Handles translating I2M operations to Vulkan
|
||||
*/
|
||||
class Inline2Memory {
|
||||
private:
|
||||
using IOVA = soc::gm20b::IOVA;
|
||||
|
||||
GPU &gpu;
|
||||
soc::gm20b::ChannelContext &channelCtx;
|
||||
gpu::interconnect::CommandExecutor &executor;
|
||||
|
||||
public:
|
||||
Inline2Memory(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
||||
|
||||
void Upload(IOVA dst, span<u32> src);
|
||||
};
|
||||
}
|
61
app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp
Normal file
61
app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#include <gpu/buffer_manager.h>
|
||||
#include <soc/gm20b/gmmu.h>
|
||||
#include <soc/gm20b/channel.h>
|
||||
#include "maxwell_dma.h"
|
||||
|
||||
namespace skyline::gpu::interconnect {
|
||||
using IOVA = soc::gm20b::IOVA;
|
||||
|
||||
MaxwellDma::MaxwellDma(GPU &gpu, soc::gm20b::ChannelContext &channelCtx)
|
||||
: gpu{gpu},
|
||||
channelCtx{channelCtx},
|
||||
executor{channelCtx.executor} {}
|
||||
|
||||
void MaxwellDma::Copy(IOVA dst, IOVA src, size_t size) {
|
||||
auto srcMappings{channelCtx.asCtx->gmmu.TranslateRange(src, size)};
|
||||
auto dstMappings{channelCtx.asCtx->gmmu.TranslateRange(dst, size)};
|
||||
|
||||
if (srcMappings.size() > 1 || dstMappings.size() > 1)
|
||||
Logger::Warn("Split mapping are unsupported for DMA copies");
|
||||
|
||||
auto srcBuf{gpu.buffer.FindOrCreate(srcMappings.front(), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||
executor.AttachLockedBuffer(buffer, std::move(lock));
|
||||
})};
|
||||
ContextLock srcBufLock{executor.tag, srcBuf};
|
||||
|
||||
auto dstBuf{gpu.buffer.FindOrCreate(dstMappings.front(), executor.tag, [this](std::shared_ptr<Buffer> buffer, ContextLock<Buffer> &&lock) {
|
||||
executor.AttachLockedBuffer(buffer, std::move(lock));
|
||||
})};
|
||||
ContextLock dstBufLock{executor.tag, dstBuf};
|
||||
|
||||
dstBuf.CopyFrom(srcBuf, [&]() {
|
||||
executor.AttachLockedBufferView(srcBuf, std::move(srcBufLock));
|
||||
executor.AttachLockedBufferView(dstBuf, std::move(dstBufLock));
|
||||
// This will prevent any CPU accesses to backing for the duration of the usage
|
||||
// GPU dirtiness will be handled on the CopyFrom end as it's not always necessary
|
||||
srcBuf.GetBuffer()->BlockAllCpuBackingWrites();
|
||||
dstBuf.GetBuffer()->BlockAllCpuBackingWrites();
|
||||
|
||||
executor.AddOutsideRpCommand([srcBuf, dstBuf](vk::raii::CommandBuffer &commandBuffer, const std::shared_ptr<FenceCycle> &, GPU &) {
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eAllCommands, vk::PipelineStageFlagBits::eTransfer, {}, vk::MemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eMemoryRead,
|
||||
.dstAccessMask = vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eTransferWrite
|
||||
}, {}, {});
|
||||
vk::BufferCopy copyRegion{
|
||||
.size = srcBuf.size,
|
||||
.srcOffset = srcBuf.GetOffset(),
|
||||
.dstOffset = dstBuf.GetOffset()
|
||||
};
|
||||
commandBuffer.copyBuffer(srcBuf.GetBuffer()->GetBacking(), dstBuf.GetBuffer()->GetBacking(), copyRegion);
|
||||
commandBuffer.pipelineBarrier(vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eAllCommands, {}, vk::MemoryBarrier{
|
||||
.srcAccessMask = vk::AccessFlagBits::eTransferWrite,
|
||||
.dstAccessMask = vk::AccessFlagBits::eMemoryRead | vk::AccessFlagBits::eMemoryWrite,
|
||||
}, {}, {});
|
||||
});
|
||||
});
|
||||
}
|
||||
}
|
36
app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h
Normal file
36
app/src/main/cpp/skyline/gpu/interconnect/maxwell_dma.h
Normal file
@ -0,0 +1,36 @@
|
||||
// SPDX-License-Identifier: MPL-2.0
|
||||
// Copyright © 2022 Ryujinx Team and Contributors (https://github.com/ryujinx/)
|
||||
// Copyright © 2022 Skyline Team and Contributors (https://github.com/skyline-emu/)
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <soc/gm20b/gmmu.h>
|
||||
|
||||
namespace skyline::gpu {
|
||||
class GPU;
|
||||
}
|
||||
|
||||
namespace skyline::soc::gm20b {
|
||||
struct ChannelContext;
|
||||
}
|
||||
|
||||
namespace skyline::gpu::interconnect {
|
||||
class CommandExecutor;
|
||||
|
||||
/**
|
||||
* @brief Handles translating Maxwell DMA operations to Vulkan
|
||||
*/
|
||||
class MaxwellDma {
|
||||
private:
|
||||
using IOVA = soc::gm20b::IOVA;
|
||||
|
||||
GPU &gpu;
|
||||
soc::gm20b::ChannelContext &channelCtx;
|
||||
gpu::interconnect::CommandExecutor &executor;
|
||||
|
||||
public:
|
||||
MaxwellDma(GPU &gpu, soc::gm20b::ChannelContext &channelCtx);
|
||||
|
||||
void Copy(IOVA dst, IOVA src, size_t size);
|
||||
};
|
||||
}
|
Reference in New Issue
Block a user