Add dynamic executor slot growth

2025-07-17 08:46:39 +00:00 · 2022-11-19 18:11:24 +00:00
parent 60169fce4c
commit 579a2d9337
4 changed files with 27 additions and 8 deletions
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.cpp
@ -55,8 +55,13 @@ namespace skyline::gpu::interconnect {
          ready{other.ready} {}

    std::shared_ptr<FenceCycle> CommandRecordThread::Slot::Reset(GPU &gpu) {
+        auto startTime{util::GetTimeNs()};
+
        cycle->Wait();
        cycle = std::make_shared<FenceCycle>(*cycle);
+        if (util::GetTimeNs() - startTime > GrowThresholdNs)
+            didWait = true;
+
        // Command buffer doesn't need to be reset since that's done implicitly by begin
        return cycle;
    }
@ -126,10 +131,7 @@ namespace skyline::gpu::interconnect {
                Logger::Warn("Failed to intialise RenderDoc API: {}", ret);
        }

-        std::vector<Slot> slots{};
-        std::generate_n(std::back_inserter(slots), (1U << *state.settings->executorSlotCountScale), [&] () -> Slot { return gpu; });
-
-        outgoing.AppendTranform(span<Slot>(slots), [](auto &slot) { return &slot; });
+        outgoing.Push(&slots.emplace_back(gpu));

        if (int result{pthread_setname_np(pthread_self(), "Sky-CmdRecord")})
            Logger::Warn("Failed to set the thread name: {}", strerror(result));
@ -148,6 +150,11 @@ namespace skyline::gpu::interconnect {
                    renderDocApi->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance), nullptr);
                slot->capture = false;

+                if (slot->didWait && slots.size() < (1U << *state.settings->executorSlotCountScale)) {
+                    outgoing.Push(&slots.emplace_back(gpu));
+                    slot->didWait = false;
+                }
+
                outgoing.Push(slot);
            }, [] {});
        } catch (const signal::SignalException &e) {
@ -166,7 +173,12 @@ namespace skyline::gpu::interconnect {
    }

    CommandRecordThread::Slot *CommandRecordThread::AcquireSlot() {
-        return outgoing.Pop();
+        auto startTime{util::GetTimeNs()};
+        auto slot{outgoing.Pop()};
+        if (util::GetTimeNs() - startTime > GrowThresholdNs)
+            slot->didWait = true;
+
+        return slot;
    }

    void CommandRecordThread::ReleaseSlot(Slot *slot) {
--- a/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
+++ b/app/src/main/cpp/skyline/gpu/interconnect/command_executor.h
@ -42,6 +42,7 @@ namespace skyline::gpu::interconnect {
            u32 executionNumber;
            bool ready{}; //!< If this slot's command buffer has had 'beginCommandBuffer' called and is ready to have commands recorded into it
            bool capture{}; //!< If this slot's Vulkan commands should be captured using the renderdoc API
+            bool didWait{}; //!< If a wait of time longer than GrowThresholdNs occured when this slot was acquired

            Slot(GPU &gpu);

@ -62,9 +63,11 @@ namespace skyline::gpu::interconnect {
        };

      private:
+        static constexpr size_t GrowThresholdNs{constant::NsInMillisecond / 4}; //!< The wait time threshold at which the slot count will be increased
        const DeviceState &state;
        CircularQueue<Slot *> incoming; //!< Slots pending recording
        CircularQueue<Slot *> outgoing; //!< Slots that have been submitted, may still be active on the GPU
+        std::list<Slot> slots;

        std::thread thread;