Complete making the kernel thread-safe #2 + Fix Shared Memory Implementation

This commit makes the kernel completely thread-safe and fixes an issue that caused libNX games to not work due to an error with KSharedMemory. In addition, implement GroupMutex to allow the kernel threads to run in parallel but still allow them to not overlap with the JNI thread.
2025-07-17 08:46:39 +00:00 · 2020-01-11 10:22:25 +05:30
parent de6d8d8f48
commit 65018aedbc
39 changed files with 547 additions and 932 deletions
--- a/app/src/main/cpp/skyline/gpu/display.cpp
+++ b/app/src/main/cpp/skyline/gpu/display.cpp
@ -17,7 +17,7 @@ namespace skyline::gpu {
            if (!nvBuffer)
                throw exception("A QueueBuffer request has an invalid NVMap Handle ({}) and ID ({})", gbpBuffer.nvmapHandle, gbpBuffer.nvmapId);
        }
-        switch(gbpBuffer.format) {
+        switch (gbpBuffer.format) {
            case WINDOW_FORMAT_RGBA_8888:
            case WINDOW_FORMAT_RGBX_8888:
                bpp = sizeof(u32);
@ -34,10 +34,6 @@ namespace skyline::gpu {
        state.process->ReadMemory(dataBuffer.data(), nvBuffer->address + gbpBuffer.offset, gbpBuffer.size);
    }

-    BufferQueue::WaitContext::WaitContext(std::shared_ptr<kernel::type::KThread> thread, DequeueIn input, kernel::ipc::OutputBuffer& buffer) : thread(std::move(thread)), input(input), buffer(buffer) {}
-
-    BufferQueue::DequeueOut::DequeueOut(u32 slot) : slot(slot), _unk0_(0x1), _unk1_(0x24) {}
-
    BufferQueue::BufferQueue(const DeviceState &state) : state(state) {}

    void BufferQueue::RequestBuffer(Parcel &in, Parcel &out) {
@ -50,9 +46,15 @@ namespace skyline::gpu {
    }

    void BufferQueue::DequeueBuffer(Parcel &in, Parcel &out) {
-        auto *data = reinterpret_cast<DequeueIn *>(in.data.data() + constant::TokenLength);
+        struct Data {
+            u32 format;
+            u32 width;
+            u32 height;
+            u32 timestamps;
+            u32 usage;
+        } *data = reinterpret_cast<Data *>(in.data.data() + constant::TokenLength);
        i64 slot{-1};
-        while(slot == -1) {
+        while (slot == -1) {
            for (auto &buffer : queue) {
                if (buffer.second->status == BufferStatus::Free && buffer.second->resolution.width == data->width && buffer.second->resolution.height == data->height && buffer.second->gbpBuffer.usage == data->usage) {
                    slot = buffer.first;
@ -62,7 +64,12 @@ namespace skyline::gpu {
            }
            sched_yield();
        }
-        DequeueOut output(static_cast<u32>(slot));
+        struct {
+            u32 slot;
+            u32 _unk_[13];
+        } output{
+            .slot = static_cast<u32>(slot)
+        };
        out.WriteData(output);
        state.logger->Debug("DequeueBuffer: Width: {}, Height: {}, Format: {}, Usage: {}, Timestamps: {}, Slot: {}", data->width, data->height, data->format, data->usage, data->timestamps, slot);
    }
@ -118,28 +125,19 @@ namespace skyline::gpu {
        auto gbpBuffer = reinterpret_cast<GbpBuffer *>(pointer);
        queue[data->slot] = std::make_shared<Buffer>(state, data->slot, *gbpBuffer);
        state.gpu->bufferEvent->Signal();
-        state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}, Size: 0x{:X}", data->slot, gbpBuffer->magic, gbpBuffer->width, gbpBuffer->height, gbpBuffer->stride, gbpBuffer->format, gbpBuffer->usage, gbpBuffer->index,gbpBuffer->nvmapId, gbpBuffer->nvmapHandle, gbpBuffer->offset, (1U << gbpBuffer->blockHeightLog2), gbpBuffer->size);
-    }
-
-    void BufferQueue::FreeBuffer(u32 slotNo) {
-        auto &slot = queue.at(slotNo);
-        if (waitVec.empty())
-            slot->status = BufferStatus::Free;
-        else {
-            auto context = waitVec.begin();
-            while (context != waitVec.end()) {
-                if (slot->resolution.width == context->input.width && slot->resolution.height == context->input.height && slot->gbpBuffer.usage == context->input.usage) {
-                    context->thread->WakeUp();
-                    gpu::Parcel out(state);
-                    DequeueOut output(slotNo);
-                    out.WriteData(output);
-                    out.WriteParcel(context->buffer);
-                    slot->status = BufferStatus::Dequeued;
-                    waitVec.erase(context);
-                    break;
-                }
-                context++;
-            }
-        }
+        state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}, Size: 0x{:X}",
+                            data->slot,
+                            gbpBuffer->magic,
+                            gbpBuffer->width,
+                            gbpBuffer->height,
+                            gbpBuffer->stride,
+                            gbpBuffer->format,
+                            gbpBuffer->usage,
+                            gbpBuffer->index,
+                            gbpBuffer->nvmapId,
+                            gbpBuffer->nvmapHandle,
+                            gbpBuffer->offset,
+                            (1U << gbpBuffer->blockHeightLog2),
+                            gbpBuffer->size);
    }
 }
--- a/app/src/main/cpp/skyline/gpu/display.h
+++ b/app/src/main/cpp/skyline/gpu/display.h
@ -13,11 +13,11 @@ namespace skyline::gpu {
        u32 width; //!< The width component of the resolution
        u32 height; //!< The height component of the resolution

-        bool operator==(const Resolution &r) {
+        inline bool operator==(const Resolution &r) {
            return (width == r.width) && (height == r.height);
        }

-        bool operator!=(const Resolution &r) {
+        inline bool operator!=(const Resolution &r) {
            return !operator==(r);
        }
    };
@ -58,8 +58,7 @@ namespace skyline::gpu {
    enum class BufferStatus {
        Free,
        Dequeued,
-        Queued,
-        Acquired
+        Queued
    };

    /**
@ -106,7 +105,6 @@ namespace skyline::gpu {
        GbpBuffer gbpBuffer; //!< The information about the underlying buffer
        BufferStatus status{BufferStatus::Free}; //!< The status of this buffer
        std::vector<u8> dataBuffer; //!< The vector holding the actual pixel data
-        std::vector<u8> swizzBuffer; //!< The vector holding the swizzled pixel data
        std::shared_ptr<device::NvMap::NvMapObject> nvBuffer{}; //!< A shared pointer to the buffer's nvmap object

        /**
@ -123,55 +121,12 @@ namespace skyline::gpu {
    };

    /**
-     * @brief This holds the state of all the buffers used by the guest application
+     * @brief This is used to manage and queue up all display buffers to be shown
     */
    class BufferQueue {
      private:
        const DeviceState &state; //!< The state of the device

-        /**
-         * @brief This is the input struct for DequeueBuffer
-         */
-        struct DequeueIn {
-            u32 format;
-            u32 width;
-            u32 height;
-            u32 timestamps;
-            u32 usage;
-        };
-
-        /**
-         * @brief This is the output struct for DequeueBuffer
-         */
-        struct DequeueOut {
-            u32 slot; //!< The slot of the dequeued buffer
-            u32 _unk0_;
-            u32 _unk1_;
-            u32 _unk2_[11]{};
-
-            /**
-             * @param slot The slot of the dequeued buffer
-             */
-            DequeueOut(u32 slot);
-        };
-
-        /**
-         * @brief This holds the context of a thread waiting on a buffer
-         */
-        struct WaitContext {
-            std::shared_ptr<kernel::type::KThread> thread; //!< The thread that is waiting on a buffer
-            DequeueIn input; //!< The input of DequeueBuffer
-            kernel::ipc::OutputBuffer buffer; //!< The output buffer to write the parcel into
-
-            /**
-             * @param thread The thread that is waiting on a buffer
-             * @param input The input of DequeueBuffer
-             * @param buffer The output buffer to write the parcel into
-             */
-            WaitContext(std::shared_ptr<kernel::type::KThread> thread, DequeueIn input, kernel::ipc::OutputBuffer& buffer);
-        };
-        std::vector<WaitContext> waitVec; //!< A vector of shared pointers to threads waiting on a buffer
-
      public:
        std::unordered_map<u32, std::shared_ptr<Buffer>> queue; //!< A vector of shared pointers to all the queued buffers
        std::queue<std::shared_ptr<Buffer>> displayQueue; //!< A queue of all the buffers to be posted to the display
@ -210,6 +165,8 @@ namespace skyline::gpu {
         * @brief This frees a buffer which is currently queued
         * @param slotNo The slot of the buffer
         */
-        void FreeBuffer(u32 slotNo);
+        inline void FreeBuffer(u32 slotNo) {
+            queue.at(slotNo)->status = BufferStatus::Free;
+        }
    };
 }
--- a/app/src/main/cpp/skyline/gpu/parcel.cpp
+++ b/app/src/main/cpp/skyline/gpu/parcel.cpp
@ -17,7 +17,7 @@ namespace skyline::gpu {

    Parcel::Parcel(const DeviceState &state) : state(state) {}

-    u64 Parcel::WriteParcel(kernel::ipc::OutputBuffer& buffer) {
+    u64 Parcel::WriteParcel(kernel::ipc::OutputBuffer &buffer) {
        return WriteParcel(buffer.address, buffer.size);
    }

--- a/app/src/main/cpp/skyline/gpu/parcel.h
+++ b/app/src/main/cpp/skyline/gpu/parcel.h
@ -82,7 +82,7 @@ namespace skyline::gpu {
         * @param buffer The buffer to write into
         * @return The total size of the message
         */
-        u64 WriteParcel(kernel::ipc::OutputBuffer& buffer);
+        u64 WriteParcel(kernel::ipc::OutputBuffer &buffer);

        /**
         * @brief Writes the Parcel object into the process's memory