Introduce chunked MegaBuffer allocation

After the introduction of workahead a system to hold a single large megabuffer per submission was implemented, this worked fine for most cases however when many submissions were flight at the same time memory usage would increase dramatically due to the amount of megabuffers needed. Since only one megabuffer was allowed per execution, it forced the buffer to be fairly large in order to accomodate the upper-bound, even further increasing memory usage. This commit implements a system to fix the memory usage issue described above by allowing multiple megabuffers to be allocated per execution, as well as reuse across executions. Allocations now go through a global allocator object which chooses which chunk to allocate into on a per-allocation scale, if all are in use by the GPU another chunk will be allocated, that can then be reused for future allocations too. This reduces Hollow Knight megabuffer memory usage by a factor 4 and SMO by even more.
2025-07-21 13:37:23 +00:00 · 2022-08-07 02:59:33 +05:30
parent 99b5fc35c6
commit 5b7572a8b3
12 changed files with 218 additions and 183 deletions
--- a/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
+++ b/app/src/main/cpp/skyline/gpu/buffer_manager.cpp
@ -5,7 +5,7 @@
 #include "buffer_manager.h"

 namespace skyline::gpu {
-    BufferManager::BufferManager(GPU &gpu) : gpu(gpu) {}
+    BufferManager::BufferManager(GPU &gpu) : gpu{gpu} {}

    bool BufferManager::BufferLessThan(const std::shared_ptr<Buffer> &it, u8 *pointer) {
        return it->guest->begin().base() < pointer;
@ -209,78 +209,4 @@ namespace skyline::gpu {
            return buffer->GetView(static_cast<vk::DeviceSize>(guestMapping.begin() - buffer->guest->begin()) + offset, size);
        }
    }
-
-    constexpr static vk::DeviceSize MegaBufferSize{100 * 1024 * 1024}; //!< Size in bytes of the megabuffer (100MiB)
-
-    BufferManager::MegaBufferSlot::MegaBufferSlot(GPU &gpu) : backing(gpu.memory.AllocateBuffer(MegaBufferSize)) {}
-
-    MegaBuffer::MegaBuffer(BufferManager::MegaBufferSlot &slot) : slot{&slot}, freeRegion{slot.backing.subspan(PAGE_SIZE)} {}
-
-    MegaBuffer::~MegaBuffer() {
-        if (slot)
-            slot->active.clear(std::memory_order_release);
-    }
-
-    MegaBuffer &MegaBuffer::operator=(MegaBuffer &&other) {
-        if (slot)
-            slot->active.clear(std::memory_order_release);
-        slot = other.slot;
-        freeRegion = other.freeRegion;
-        other.slot = nullptr;
-        return *this;
-    }
-
-    bool MegaBuffer::WasUsed() {
-        return freeRegion != slot->backing.subspan(PAGE_SIZE);
-    }
-
-    void MegaBuffer::ReplaceCycle(const std::shared_ptr<FenceCycle> &cycle) {
-        slot->cycle = cycle;
-    }
-
-    void MegaBuffer::Reset() {
-        freeRegion = slot->backing.subspan(PAGE_SIZE);
-    }
-
-    vk::Buffer MegaBuffer::GetBacking() const {
-        return slot->backing.vkBuffer;
-    }
-
-    vk::DeviceSize MegaBuffer::Push(span<u8> data, bool pageAlign) {
-        if (data.size() > freeRegion.size())
-            throw exception("Ran out of megabuffer space! Alloc size: 0x{:X}", data.size());
-
-        if (pageAlign) {
-            // If page aligned data was requested then align the free
-            auto alignedFreeBase{util::AlignUp(static_cast<size_t>(freeRegion.data() - slot->backing.data()), PAGE_SIZE)};
-            freeRegion = slot->backing.subspan(alignedFreeBase);
-        }
-
-        // Allocate space for data from the free region
-        auto resultSpan{freeRegion.subspan(0, data.size())};
-        resultSpan.copy_from(data);
-
-        // Move the free region along
-        freeRegion = freeRegion.subspan(data.size());
-        return static_cast<vk::DeviceSize>(resultSpan.data() - slot->backing.data());
-    }
-
-    MegaBuffer BufferManager::AcquireMegaBuffer(const std::shared_ptr<FenceCycle> &cycle) {
-        std::scoped_lock lock{megaBufferMutex};
-
-        for (auto &slot : megaBuffers) {
-            if (!slot.active.test_and_set(std::memory_order_acq_rel)) {
-                if (slot.cycle->Poll()) {
-                    slot.cycle = cycle;
-                    return {slot};
-                } else {
-                    slot.active.clear(std::memory_order_release);
-                }
-            }
-        }
-
-        auto &megaBuffer{megaBuffers.emplace_back(gpu)};
-        megaBuffer.cycle = cycle;
-        return {megaBuffer};
-    }
 }