mirror of
https://github.com/Takiiiiiiii/strato.git
synced 2025-07-17 08:46:39 +00:00
Address CR Comments + Increase minSdkVersion
to 29 (Android 10)
This commit is contained in:
@ -5,7 +5,7 @@
|
||||
#include "command_scheduler.h"
|
||||
|
||||
namespace skyline::gpu {
|
||||
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : active(true), device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
|
||||
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
|
||||
|
||||
bool CommandScheduler::CommandBufferSlot::AllocateIfFree(CommandScheduler::CommandBufferSlot &slot) {
|
||||
if (slot.active.test_and_set(std::memory_order_acq_rel)) {
|
||||
@ -44,7 +44,7 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
|
||||
std::lock_guard lock(gpu.queueMutex);
|
||||
std::scoped_lock lock(gpu.queueMutex);
|
||||
gpu.vkQueue.submit(vk::SubmitInfo{
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &*commandBuffer,
|
||||
|
@ -15,7 +15,7 @@ namespace skyline::gpu {
|
||||
* @brief A wrapper around a command buffer which tracks its state to avoid concurrent usage
|
||||
*/
|
||||
struct CommandBufferSlot {
|
||||
std::atomic_flag active; //!< If the command buffer is currently being recorded to
|
||||
std::atomic_flag active{true}; //!< If the command buffer is currently being recorded to
|
||||
const vk::raii::Device &device;
|
||||
vk::raii::CommandBuffer commandBuffer;
|
||||
vk::raii::Fence fence; //!< A fence used for tracking all submits of a buffer
|
||||
|
@ -116,10 +116,9 @@ namespace skyline::gpu {
|
||||
if (!signalled.test(std::memory_order_consume)) {
|
||||
auto it{dependencies.begin()}, next{std::next(it)};
|
||||
if (it != dependencies.end()) {
|
||||
while (next != dependencies.end()) {
|
||||
for (; next != dependencies.end(); next++) {
|
||||
(*it)->next = *next;
|
||||
it = next;
|
||||
next = std::next(next);
|
||||
}
|
||||
}
|
||||
AttachObject(*dependencies.begin());
|
||||
|
@ -4,6 +4,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
/* A collection of various types from AOSP that allow us to access private APIs for Native Window which we utilize for emulating the guest SF more accurately */
|
||||
|
||||
/**
|
||||
* @url https://cs.android.com/android/platform/superproject/+/android11-release:frameworks/native/libs/nativebase/include/nativebase/nativebase.h;l=29;drc=cb496acbe593326e8d5d563847067d02b2df40ec
|
||||
*/
|
||||
|
@ -38,19 +38,22 @@ namespace skyline::gpu {
|
||||
}
|
||||
}
|
||||
|
||||
void PresentationEngine::ChoreographerCallback(long frameTimeNanos, PresentationEngine *engine) {
|
||||
u64 cycleLength{frameTimeNanos - engine->lastChoreographerTime};
|
||||
if (std::abs(static_cast<i64>(cycleLength - engine->refreshCycleDuration)) > (constant::NsInMillisecond / 2)) {
|
||||
void PresentationEngine::ChoreographerCallback(int64_t frameTimeNanos, PresentationEngine *engine) {
|
||||
// If the duration of this cycle deviates by ±0.5ms from the current refresh cycle duration then we reevaluate it
|
||||
i64 cycleLength{frameTimeNanos - engine->lastChoreographerTime};
|
||||
if (std::abs(cycleLength - engine->refreshCycleDuration) > (constant::NsInMillisecond / 2)) {
|
||||
if (engine->window)
|
||||
engine->window->perform(engine->window, NATIVE_WINDOW_GET_REFRESH_CYCLE_DURATION, &engine->refreshCycleDuration);
|
||||
else
|
||||
engine->refreshCycleDuration = cycleLength;
|
||||
}
|
||||
|
||||
// Record the current cycle's timestamp and signal the V-Sync event to notify the game that a frame has been displayed
|
||||
engine->lastChoreographerTime = frameTimeNanos;
|
||||
engine->vsyncEvent->Signal();
|
||||
|
||||
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), engine);
|
||||
// Post the frame callback to be triggered on the next display refresh
|
||||
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), engine);
|
||||
}
|
||||
|
||||
void PresentationEngine::ChoreographerThread() {
|
||||
@ -58,7 +61,7 @@ namespace skyline::gpu {
|
||||
try {
|
||||
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
|
||||
choreographerLooper = ALooper_prepare(0);
|
||||
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), this);
|
||||
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), this);
|
||||
ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called
|
||||
} catch (const signal::SignalException &e) {
|
||||
state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
|
||||
@ -180,12 +183,15 @@ namespace skyline::gpu {
|
||||
if (window->common.version != sizeof(ANativeWindow))
|
||||
throw exception("ANativeWindow* has unexpected version: {} instead of {}", window->common.version, sizeof(ANativeWindow));
|
||||
|
||||
if (windowCrop)
|
||||
window->perform(window, NATIVE_WINDOW_SET_CROP, &windowCrop);
|
||||
if (windowScalingMode != NativeWindowScalingMode::ScaleToWindow)
|
||||
window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast<i32>(windowScalingMode));
|
||||
int result;
|
||||
if (windowCrop && (result = window->perform(window, NATIVE_WINDOW_SET_CROP, &windowCrop)))
|
||||
throw exception("Setting the layer crop to ({}-{})x({}-{}) failed with {}", windowCrop.left, windowCrop.right, windowCrop.top, windowCrop.bottom, result);
|
||||
|
||||
window->perform(window, NATIVE_WINDOW_ENABLE_FRAME_TIMESTAMPS, true);
|
||||
if (windowScalingMode != NativeWindowScalingMode::ScaleToWindow && (result = window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast<i32>(windowScalingMode))))
|
||||
throw exception("Setting the layer scaling mode to '{}' failed with {}", ToString(windowScalingMode), result);
|
||||
|
||||
if ((result = window->perform(window, NATIVE_WINDOW_ENABLE_FRAME_TIMESTAMPS, true)))
|
||||
throw exception("Enabling frame timestamps failed with {}", result);
|
||||
|
||||
surfaceCondition.notify_all();
|
||||
} else {
|
||||
@ -272,15 +278,19 @@ namespace skyline::gpu {
|
||||
}
|
||||
|
||||
if (frameTimestamp) {
|
||||
auto now{util::GetTimeNs()};
|
||||
i64 now{static_cast<i64>(util::GetTimeNs())};
|
||||
auto sampleWeight{swapInterval ? constant::NsInSecond / (refreshCycleDuration * swapInterval) : 10}; //!< The weight of each sample in calculating the average, we arbitrarily average 10 samples for unlocked FPS
|
||||
|
||||
u64 currentFrametime{now - frameTimestamp};
|
||||
averageFrametimeNs = (((sampleWeight - 1) * averageFrametimeNs) / sampleWeight) + (currentFrametime / sampleWeight);
|
||||
auto weightedAverage{[](auto weight, auto previousAverage, auto current) {
|
||||
return (((weight - 1) * previousAverage) + current) / weight;
|
||||
}}; //!< Modified moving average (https://en.wikipedia.org/wiki/Moving_average#Modified_moving_average)
|
||||
|
||||
i64 currentFrametime{now - frameTimestamp};
|
||||
averageFrametimeNs = weightedAverage(sampleWeight, averageFrametimeNs, currentFrametime);
|
||||
AverageFrametimeMs = static_cast<jfloat>(averageFrametimeNs) / constant::NsInMillisecond;
|
||||
|
||||
i64 currentFrametimeDeviation{std::abs(static_cast<i64>(averageFrametimeNs - currentFrametime))};
|
||||
averageFrametimeDeviationNs = (((sampleWeight - 1) * averageFrametimeDeviationNs) / sampleWeight) + (currentFrametimeDeviation / sampleWeight);
|
||||
i64 currentFrametimeDeviation{std::abs(averageFrametimeNs - currentFrametime)};
|
||||
averageFrametimeDeviationNs = weightedAverage(sampleWeight, averageFrametimeDeviationNs, currentFrametimeDeviation);
|
||||
AverageFrametimeDeviationMs = static_cast<jfloat>(averageFrametimeDeviationNs) / constant::NsInMillisecond;
|
||||
|
||||
Fps = std::round(static_cast<float>(constant::NsInSecond) / averageFrametimeNs);
|
||||
|
@ -21,9 +21,9 @@ namespace skyline::gpu {
|
||||
GPU &gpu;
|
||||
|
||||
std::mutex mutex; //!< Synchronizes access to the surface objects
|
||||
std::condition_variable surfaceCondition; //!< Allows us to efficiently wait for Vulkan surface to be initialized
|
||||
std::condition_variable surfaceCondition; //!< Signalled when a valid Vulkan surface is available
|
||||
jobject jSurface{}; //!< The Java Surface object backing the ANativeWindow
|
||||
ANativeWindow *window{}; //!< A pointer to an Android Native Window which is the surface we draw to
|
||||
ANativeWindow *window{}; //!< The backing Android Native Window for the surface we draw to, we keep this around to access private APIs not exposed via Vulkan
|
||||
service::hosbinder::AndroidRect windowCrop{}; //!< A rectangle with the bounds of the current crop performed on the image prior to presentation
|
||||
service::hosbinder::NativeWindowScalingMode windowScalingMode{service::hosbinder::NativeWindowScalingMode::ScaleToWindow}; //!< The mode in which the cropped image is scaled up to the surface
|
||||
service::hosbinder::NativeWindowTransform windowTransform{}; //!< The transformation performed on the image prior to presentation
|
||||
@ -40,23 +40,23 @@ namespace skyline::gpu {
|
||||
static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain
|
||||
std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain
|
||||
|
||||
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
|
||||
u64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds
|
||||
u64 averageFrametimeDeviationNs{}; //!< The average deviation of frametimes in nanoseconds
|
||||
i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
|
||||
i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds
|
||||
i64 averageFrametimeDeviationNs{}; //!< The average deviation of frametimes in nanoseconds
|
||||
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
|
||||
|
||||
std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer
|
||||
ALooper *choreographerLooper{}; //!< The looper object associated with the Choreographer thread
|
||||
u64 lastChoreographerTime{}; //!< The timestamp of the last invocation of Choreographer::doFrame
|
||||
u64 refreshCycleDuration{}; //!< The duration of a single refresh cycle for the display in nanoseconds
|
||||
std::thread choreographerThread; //!< A thread for signalling the V-Sync event and measure the refresh cycle duration using AChoreographer
|
||||
ALooper *choreographerLooper{};
|
||||
i64 lastChoreographerTime{}; //!< The timestamp of the last invocation of Choreographer::doFrame
|
||||
i64 refreshCycleDuration{}; //!< The duration of a single refresh cycle for the display in nanoseconds
|
||||
|
||||
/**
|
||||
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback
|
||||
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_postframecallback64
|
||||
*/
|
||||
static void ChoreographerCallback(long frameTimeNanos, PresentationEngine *engine);
|
||||
static void ChoreographerCallback(int64_t frameTimeNanos, PresentationEngine *engine);
|
||||
|
||||
/**
|
||||
* @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread
|
||||
* @brief The entry point for the the Choreographer thread, sets up the AChoreographer callback then runs ALooper on the thread
|
||||
*/
|
||||
void ChoreographerThread();
|
||||
|
||||
|
@ -140,7 +140,7 @@ namespace skyline::gpu {
|
||||
u8 *bufferData;
|
||||
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
|
||||
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
|
||||
// We need a staging buffer for all optimal copies (Since we aren't aware of the host optimal layout) and linear textures which we cannot map on the CPU since we do not have access to their backing VkDeviceMemory
|
||||
// We need a staging buffer for all optimal copies (since we aren't aware of the host optimal layout) and linear textures which we cannot map on the CPU since we do not have access to their backing VkDeviceMemory
|
||||
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
|
||||
bufferData = stagingBuffer->data();
|
||||
return stagingBuffer;
|
||||
@ -156,19 +156,19 @@ namespace skyline::gpu {
|
||||
|
||||
if (guest->tileMode == texture::TileMode::Block) {
|
||||
// Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32
|
||||
constexpr u8 sectorWidth{16}; // The width of a sector in bytes
|
||||
constexpr u8 sectorHeight{2}; // The height of a sector in lines
|
||||
constexpr u8 gobWidth{64}; // The width of a GOB in bytes
|
||||
constexpr u8 gobHeight{8}; // The height of a GOB in lines
|
||||
constexpr u8 SectorWidth{16}; // The width of a sector in bytes
|
||||
constexpr u8 SectorHeight{2}; // The height of a sector in lines
|
||||
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
|
||||
constexpr u8 GobHeight{8}; // The height of a GOB in lines
|
||||
|
||||
auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs
|
||||
auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
|
||||
auto robHeight{GobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
|
||||
auto surfaceHeight{dimensions.height / guest->format.blockHeight}; // The height of the surface in lines
|
||||
auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks)
|
||||
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, gobWidth)}; // The width of a ROB in bytes
|
||||
auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
|
||||
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, GobWidth)}; // The width of a ROB in bytes
|
||||
auto robWidthBlocks{robWidthBytes / GobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
|
||||
auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes
|
||||
auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
|
||||
auto gobYOffset{robWidthBytes * GobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
|
||||
|
||||
auto inputSector{pointer}; // The address of the input sector
|
||||
auto outputRob{bufferData}; // The address of the output block
|
||||
@ -178,22 +178,22 @@ namespace skyline::gpu {
|
||||
for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks
|
||||
auto outputGob{outputBlock}; // We iterate through a GOB independently of the block
|
||||
for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
|
||||
for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
|
||||
for (u32 index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
|
||||
u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis
|
||||
u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
|
||||
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth);
|
||||
inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data
|
||||
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, SectorWidth);
|
||||
inputSector += SectorWidth; // `sectorWidth` bytes are of sequential image data
|
||||
}
|
||||
outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB
|
||||
}
|
||||
inputSector += paddingY; // Increment the input sector to the next sector
|
||||
outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
|
||||
outputBlock += GobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
|
||||
}
|
||||
outputRob += robBytes; // Increment the output block to the next ROB
|
||||
|
||||
y += robHeight; // Increment the Y position to the next ROB
|
||||
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding
|
||||
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors
|
||||
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / GobHeight)); // Calculate the amount of Y GOBs which aren't padding
|
||||
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors
|
||||
}
|
||||
} else if (guest->tileMode == texture::TileMode::Pitch) {
|
||||
auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data
|
||||
|
@ -264,6 +264,7 @@ namespace skyline::gpu {
|
||||
|
||||
/**
|
||||
* @brief Acquires an exclusive lock on the texture for the calling thread
|
||||
* @note Naming is in accordance to the BasicLockable named requirement
|
||||
*/
|
||||
void lock() {
|
||||
mutex.lock();
|
||||
@ -271,6 +272,7 @@ namespace skyline::gpu {
|
||||
|
||||
/**
|
||||
* @brief Relinquishes an existing lock on the texture by the calling thread
|
||||
* @note Naming is in accordance to the BasicLockable named requirement
|
||||
*/
|
||||
void unlock() {
|
||||
mutex.unlock();
|
||||
@ -278,6 +280,7 @@ namespace skyline::gpu {
|
||||
|
||||
/**
|
||||
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
|
||||
* @note Naming is in accordance to the Lockable named requirement
|
||||
*/
|
||||
bool try_lock() {
|
||||
return mutex.try_lock();
|
||||
|
Reference in New Issue
Block a user