Optimize deswizzling implementation and support multiple formats

The deswizzling implementation currently writes linearly and reads non-linearly, this is non optimal as the MMU cannot read ahead. This flips that and reads linearly while it writes non-linearly. This is based on: 324a3624ac/nx/source/display/framebuffer.c (L189).
This commit is contained in:
◱ PixelyIon
2019-11-22 22:23:36 +05:30
committed by ◱ PixelyIon
parent e11d7d9ce0
commit 745cb208a6
4 changed files with 50 additions and 39 deletions

View File

@ -17,6 +17,17 @@ namespace skyline::gpu {
if (!nvBuffer)
throw exception("A QueueBuffer request has an invalid NVMap Handle ({}) and ID ({})", gbpBuffer.nvmapHandle, gbpBuffer.nvmapId);
}
switch(gbpBuffer.format) {
case WINDOW_FORMAT_RGBA_8888:
case WINDOW_FORMAT_RGBX_8888:
bpp = sizeof(u32);
break;
case WINDOW_FORMAT_RGB_565:
bpp = sizeof(u16);
break;
default:
throw exception("Unknown pixel format used for FB");
}
}
void Buffer::UpdateBuffer() {
@ -31,19 +42,18 @@ namespace skyline::gpu {
void BufferQueue::RequestBuffer(Parcel &in, Parcel &out) {
u32 slot = *reinterpret_cast<u32 *>(in.data.data() + constant::TokenLength);
auto buffer = queue.at(slot);
out.WriteData<u32>(1);
out.WriteData<u32>(sizeof(GbpBuffer));
out.WriteData<u32>(0);
out.WriteData(buffer->gbpBuffer);
state.logger->Debug("RequestBuffer: Slot: {}, Size: {}", slot, sizeof(GbpBuffer));
out.WriteData(queue.at(slot)->gbpBuffer);
state.logger->Debug("RequestBuffer: Slot: {}", slot, sizeof(GbpBuffer));
}
bool BufferQueue::DequeueBuffer(Parcel &in, Parcel &out, u64 address, u64 size) {
auto *data = reinterpret_cast<DequeueIn *>(in.data.data() + constant::TokenLength);
i64 slot{-1};
for (auto &buffer : queue) {
if (buffer.second->status == BufferStatus::Free && buffer.second->resolution.width == data->width && buffer.second->resolution.height == data->height && buffer.second->gbpBuffer.format == data->format && buffer.second->gbpBuffer.usage == data->usage) {
if (buffer.second->status == BufferStatus::Free && buffer.second->resolution.width == data->width && buffer.second->resolution.height == data->height && buffer.second->gbpBuffer.usage == data->usage) {
slot = buffer.first;
buffer.second->status = BufferStatus::Dequeued;
}
@ -51,7 +61,7 @@ namespace skyline::gpu {
if (slot == -1) {
state.thisThread->Sleep();
waitVec.emplace_back(state.thisThread, *data, address, size);
state.logger->Debug("DequeueBuffer: No Free Buffers");
state.logger->Debug("DequeueBuffer: Width: {}, Height: {}, Format: {}, Usage: {}, Timestamps: {}, No Free Buffers", data->width, data->height, data->format, data->usage, data->timestamps);
return true;
}
DequeueOut output(static_cast<u32>(slot));
@ -86,8 +96,7 @@ namespace skyline::gpu {
.height = buffer->gbpBuffer.height
};
out.WriteData(output);
state.logger->Debug("QueueBuffer: Timestamp: {}, Auto Timestamp: {}, Crop: [T: {}, B: {}, L: {}, R: {}], Scaling Mode: {}, Transform: {}, Sticky Transform: {}, Swap Interval: {}, Slot: {}", data->timestamp, data->autoTimestamp, data->crop.top, data->crop.bottom, data->crop.left, data->crop.right, data->scalingMode, data->transform, data->stickyTransform, data->swapInterval,
data->slot);
state.logger->Debug("QueueBuffer: Timestamp: {}, Auto Timestamp: {}, Crop: [T: {}, B: {}, L: {}, R: {}], Scaling Mode: {}, Transform: {}, Sticky Transform: {}, Swap Interval: {}, Slot: {}", data->timestamp, data->autoTimestamp, data->crop.top, data->crop.bottom, data->crop.left, data->crop.right, data->scalingMode, data->transform, data->stickyTransform, data->swapInterval, data->slot);
}
void BufferQueue::CancelBuffer(Parcel &parcel) {
@ -96,6 +105,7 @@ namespace skyline::gpu {
Fence fence[4];
} *data = reinterpret_cast<Data *>(parcel.data.data() + constant::TokenLength);
FreeBuffer(data->slot);
state.logger->Debug("CancelBuffer: Slot: {}", data->slot);
}
void BufferQueue::SetPreallocatedBuffer(Parcel &parcel) {
@ -110,8 +120,7 @@ namespace skyline::gpu {
auto gbpBuffer = reinterpret_cast<GbpBuffer *>(pointer);
queue[data->slot] = std::make_shared<Buffer>(state, data->slot, *gbpBuffer);
state.gpu->bufferEvent->Signal();
state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Length: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}", data->slot, data->length, gbpBuffer->magic, gbpBuffer->width, gbpBuffer->height, gbpBuffer->stride, gbpBuffer->format, gbpBuffer->usage, gbpBuffer->index, gbpBuffer->nvmapId,
gbpBuffer->nvmapHandle, gbpBuffer->offset, (1U << gbpBuffer->blockHeightLog2));
state.logger->Debug("SetPreallocatedBuffer: Slot: {}, Magic: 0x{:X}, Width: {}, Height: {}, Stride: {}, Format: {}, Usage: {}, Index: {}, ID: {}, Handle: {}, Offset: 0x{:X}, Block Height: {}, Size: 0x{:X}", data->slot, gbpBuffer->magic, gbpBuffer->width, gbpBuffer->height, gbpBuffer->stride, gbpBuffer->format, gbpBuffer->usage, gbpBuffer->index,gbpBuffer->nvmapId, gbpBuffer->nvmapHandle, gbpBuffer->offset, (1U << gbpBuffer->blockHeightLog2), gbpBuffer->size);
}
void BufferQueue::FreeBuffer(u32 slotNo) {

View File

@ -90,8 +90,8 @@ namespace skyline::gpu {
* @brief This represents conditions for the completion of an asynchronous graphics operation
*/
struct Fence {
u32 syncptId;
u32 syncptValue;
u32 syncptId; //!< The ID of the syncpoint
u32 syncptValue; //!< The value of the syncpoint
};
/**
@ -101,6 +101,7 @@ namespace skyline::gpu {
public:
const DeviceState &state; //!< The state of the device
u32 slot; //!< The slot the buffer is in
u32 bpp; //!< The amount of bytes per pixel
Resolution resolution; //!< The resolution of this buffer
GbpBuffer gbpBuffer; //!< The information about the underlying buffer
BufferStatus status{BufferStatus::Free}; //!< The status of this buffer