Address CR Comments + Increase minSdkVersion to 29 (Android 10)

This commit is contained in:
PixelyIon
2021-07-10 19:08:18 +05:30
committed by ◱ Mark
parent 19be9f4b66
commit ec1da1b3f5
17 changed files with 165 additions and 176 deletions

View File

@ -5,7 +5,7 @@
#include "command_scheduler.h"
namespace skyline::gpu {
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : active(true), device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
CommandScheduler::CommandBufferSlot::CommandBufferSlot(vk::raii::Device &device, vk::CommandBuffer commandBuffer, vk::raii::CommandPool &pool) : device(device), commandBuffer(device, commandBuffer, pool), fence(device, vk::FenceCreateInfo{}), cycle(std::make_shared<FenceCycle>(device, *fence)) {}
bool CommandScheduler::CommandBufferSlot::AllocateIfFree(CommandScheduler::CommandBufferSlot &slot) {
if (slot.active.test_and_set(std::memory_order_acq_rel)) {
@ -44,7 +44,7 @@ namespace skyline::gpu {
}
void CommandScheduler::SubmitCommandBuffer(const vk::raii::CommandBuffer &commandBuffer, vk::Fence fence) {
std::lock_guard lock(gpu.queueMutex);
std::scoped_lock lock(gpu.queueMutex);
gpu.vkQueue.submit(vk::SubmitInfo{
.commandBufferCount = 1,
.pCommandBuffers = &*commandBuffer,

View File

@ -15,7 +15,7 @@ namespace skyline::gpu {
* @brief A wrapper around a command buffer which tracks its state to avoid concurrent usage
*/
struct CommandBufferSlot {
std::atomic_flag active; //!< If the command buffer is currently being recorded to
std::atomic_flag active{true}; //!< If the command buffer is currently being recorded to
const vk::raii::Device &device;
vk::raii::CommandBuffer commandBuffer;
vk::raii::Fence fence; //!< A fence used for tracking all submits of a buffer

View File

@ -116,10 +116,9 @@ namespace skyline::gpu {
if (!signalled.test(std::memory_order_consume)) {
auto it{dependencies.begin()}, next{std::next(it)};
if (it != dependencies.end()) {
while (next != dependencies.end()) {
for (; next != dependencies.end(); next++) {
(*it)->next = *next;
it = next;
next = std::next(next);
}
}
AttachObject(*dependencies.begin());

View File

@ -4,6 +4,8 @@
#pragma once
/* A collection of various types from AOSP that allow us to access private APIs for Native Window which we utilize for emulating the guest SF more accurately */
/**
* @url https://cs.android.com/android/platform/superproject/+/android11-release:frameworks/native/libs/nativebase/include/nativebase/nativebase.h;l=29;drc=cb496acbe593326e8d5d563847067d02b2df40ec
*/

View File

@ -38,19 +38,22 @@ namespace skyline::gpu {
}
}
void PresentationEngine::ChoreographerCallback(long frameTimeNanos, PresentationEngine *engine) {
u64 cycleLength{frameTimeNanos - engine->lastChoreographerTime};
if (std::abs(static_cast<i64>(cycleLength - engine->refreshCycleDuration)) > (constant::NsInMillisecond / 2)) {
void PresentationEngine::ChoreographerCallback(int64_t frameTimeNanos, PresentationEngine *engine) {
// If the duration of this cycle deviates by ±0.5ms from the current refresh cycle duration then we reevaluate it
i64 cycleLength{frameTimeNanos - engine->lastChoreographerTime};
if (std::abs(cycleLength - engine->refreshCycleDuration) > (constant::NsInMillisecond / 2)) {
if (engine->window)
engine->window->perform(engine->window, NATIVE_WINDOW_GET_REFRESH_CYCLE_DURATION, &engine->refreshCycleDuration);
else
engine->refreshCycleDuration = cycleLength;
}
// Record the current cycle's timestamp and signal the V-Sync event to notify the game that a frame has been displayed
engine->lastChoreographerTime = frameTimeNanos;
engine->vsyncEvent->Signal();
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), engine);
// Post the frame callback to be triggered on the next display refresh
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), engine);
}
void PresentationEngine::ChoreographerThread() {
@ -58,7 +61,7 @@ namespace skyline::gpu {
try {
signal::SetSignalHandler({SIGINT, SIGILL, SIGTRAP, SIGBUS, SIGFPE, SIGSEGV}, signal::ExceptionalSignalHandler);
choreographerLooper = ALooper_prepare(0);
AChoreographer_postFrameCallback(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), this);
AChoreographer_postFrameCallback64(AChoreographer_getInstance(), reinterpret_cast<AChoreographer_frameCallback>(&ChoreographerCallback), this);
ALooper_pollAll(-1, nullptr, nullptr, nullptr); // Will block and process callbacks till ALooper_wake() is called
} catch (const signal::SignalException &e) {
state.logger->Error("{}\nStack Trace:{}", e.what(), state.loader->GetStackTrace(e.frames));
@ -180,12 +183,15 @@ namespace skyline::gpu {
if (window->common.version != sizeof(ANativeWindow))
throw exception("ANativeWindow* has unexpected version: {} instead of {}", window->common.version, sizeof(ANativeWindow));
if (windowCrop)
window->perform(window, NATIVE_WINDOW_SET_CROP, &windowCrop);
if (windowScalingMode != NativeWindowScalingMode::ScaleToWindow)
window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast<i32>(windowScalingMode));
int result;
if (windowCrop && (result = window->perform(window, NATIVE_WINDOW_SET_CROP, &windowCrop)))
throw exception("Setting the layer crop to ({}-{})x({}-{}) failed with {}", windowCrop.left, windowCrop.right, windowCrop.top, windowCrop.bottom, result);
window->perform(window, NATIVE_WINDOW_ENABLE_FRAME_TIMESTAMPS, true);
if (windowScalingMode != NativeWindowScalingMode::ScaleToWindow && (result = window->perform(window, NATIVE_WINDOW_SET_SCALING_MODE, static_cast<i32>(windowScalingMode))))
throw exception("Setting the layer scaling mode to '{}' failed with {}", ToString(windowScalingMode), result);
if ((result = window->perform(window, NATIVE_WINDOW_ENABLE_FRAME_TIMESTAMPS, true)))
throw exception("Enabling frame timestamps failed with {}", result);
surfaceCondition.notify_all();
} else {
@ -272,15 +278,19 @@ namespace skyline::gpu {
}
if (frameTimestamp) {
auto now{util::GetTimeNs()};
i64 now{static_cast<i64>(util::GetTimeNs())};
auto sampleWeight{swapInterval ? constant::NsInSecond / (refreshCycleDuration * swapInterval) : 10}; //!< The weight of each sample in calculating the average, we arbitrarily average 10 samples for unlocked FPS
u64 currentFrametime{now - frameTimestamp};
averageFrametimeNs = (((sampleWeight - 1) * averageFrametimeNs) / sampleWeight) + (currentFrametime / sampleWeight);
auto weightedAverage{[](auto weight, auto previousAverage, auto current) {
return (((weight - 1) * previousAverage) + current) / weight;
}}; //!< Modified moving average (https://en.wikipedia.org/wiki/Moving_average#Modified_moving_average)
i64 currentFrametime{now - frameTimestamp};
averageFrametimeNs = weightedAverage(sampleWeight, averageFrametimeNs, currentFrametime);
AverageFrametimeMs = static_cast<jfloat>(averageFrametimeNs) / constant::NsInMillisecond;
i64 currentFrametimeDeviation{std::abs(static_cast<i64>(averageFrametimeNs - currentFrametime))};
averageFrametimeDeviationNs = (((sampleWeight - 1) * averageFrametimeDeviationNs) / sampleWeight) + (currentFrametimeDeviation / sampleWeight);
i64 currentFrametimeDeviation{std::abs(averageFrametimeNs - currentFrametime)};
averageFrametimeDeviationNs = weightedAverage(sampleWeight, averageFrametimeDeviationNs, currentFrametimeDeviation);
AverageFrametimeDeviationMs = static_cast<jfloat>(averageFrametimeDeviationNs) / constant::NsInMillisecond;
Fps = std::round(static_cast<float>(constant::NsInSecond) / averageFrametimeNs);

View File

@ -21,9 +21,9 @@ namespace skyline::gpu {
GPU &gpu;
std::mutex mutex; //!< Synchronizes access to the surface objects
std::condition_variable surfaceCondition; //!< Allows us to efficiently wait for Vulkan surface to be initialized
std::condition_variable surfaceCondition; //!< Signalled when a valid Vulkan surface is available
jobject jSurface{}; //!< The Java Surface object backing the ANativeWindow
ANativeWindow *window{}; //!< A pointer to an Android Native Window which is the surface we draw to
ANativeWindow *window{}; //!< The backing Android Native Window for the surface we draw to, we keep this around to access private APIs not exposed via Vulkan
service::hosbinder::AndroidRect windowCrop{}; //!< A rectangle with the bounds of the current crop performed on the image prior to presentation
service::hosbinder::NativeWindowScalingMode windowScalingMode{service::hosbinder::NativeWindowScalingMode::ScaleToWindow}; //!< The mode in which the cropped image is scaled up to the surface
service::hosbinder::NativeWindowTransform windowTransform{}; //!< The transformation performed on the image prior to presentation
@ -40,23 +40,23 @@ namespace skyline::gpu {
static constexpr size_t MaxSwapchainImageCount{6}; //!< The maximum amount of swapchain textures, this affects the amount of images that can be in the swapchain
std::array<std::shared_ptr<Texture>, MaxSwapchainImageCount> images; //!< All the swapchain textures in the same order as supplied by the host swapchain
u64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
u64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds
u64 averageFrametimeDeviationNs{}; //!< The average deviation of frametimes in nanoseconds
i64 frameTimestamp{}; //!< The timestamp of the last frame being shown in nanoseconds
i64 averageFrametimeNs{}; //!< The average time between frames in nanoseconds
i64 averageFrametimeDeviationNs{}; //!< The average deviation of frametimes in nanoseconds
perfetto::Track presentationTrack; //!< Perfetto track used for presentation events
std::thread choreographerThread; //!< A thread for signalling the V-Sync event using AChoreographer
ALooper *choreographerLooper{}; //!< The looper object associated with the Choreographer thread
u64 lastChoreographerTime{}; //!< The timestamp of the last invocation of Choreographer::doFrame
u64 refreshCycleDuration{}; //!< The duration of a single refresh cycle for the display in nanoseconds
std::thread choreographerThread; //!< A thread for signalling the V-Sync event and measure the refresh cycle duration using AChoreographer
ALooper *choreographerLooper{};
i64 lastChoreographerTime{}; //!< The timestamp of the last invocation of Choreographer::doFrame
i64 refreshCycleDuration{}; //!< The duration of a single refresh cycle for the display in nanoseconds
/**
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_framecallback
* @url https://developer.android.com/ndk/reference/group/choreographer#achoreographer_postframecallback64
*/
static void ChoreographerCallback(long frameTimeNanos, PresentationEngine *engine);
static void ChoreographerCallback(int64_t frameTimeNanos, PresentationEngine *engine);
/**
* @brief The entry point for the the Choreographer thread, the function runs ALooper on the thread
* @brief The entry point for the the Choreographer thread, sets up the AChoreographer callback then runs ALooper on the thread
*/
void ChoreographerThread();

View File

@ -140,7 +140,7 @@ namespace skyline::gpu {
u8 *bufferData;
auto stagingBuffer{[&]() -> std::shared_ptr<memory::StagingBuffer> {
if (tiling == vk::ImageTiling::eOptimal || !std::holds_alternative<memory::Image>(backing)) {
// We need a staging buffer for all optimal copies (Since we aren't aware of the host optimal layout) and linear textures which we cannot map on the CPU since we do not have access to their backing VkDeviceMemory
// We need a staging buffer for all optimal copies (since we aren't aware of the host optimal layout) and linear textures which we cannot map on the CPU since we do not have access to their backing VkDeviceMemory
auto stagingBuffer{gpu.memory.AllocateStagingBuffer(size)};
bufferData = stagingBuffer->data();
return stagingBuffer;
@ -156,19 +156,19 @@ namespace skyline::gpu {
if (guest->tileMode == texture::TileMode::Block) {
// Reference on Block-linear tiling: https://gist.github.com/PixelyIon/d9c35050af0ef5690566ca9f0965bc32
constexpr u8 sectorWidth{16}; // The width of a sector in bytes
constexpr u8 sectorHeight{2}; // The height of a sector in lines
constexpr u8 gobWidth{64}; // The width of a GOB in bytes
constexpr u8 gobHeight{8}; // The height of a GOB in lines
constexpr u8 SectorWidth{16}; // The width of a sector in bytes
constexpr u8 SectorHeight{2}; // The height of a sector in lines
constexpr u8 GobWidth{64}; // The width of a GOB in bytes
constexpr u8 GobHeight{8}; // The height of a GOB in lines
auto blockHeight{guest->tileConfig.blockHeight}; // The height of the blocks in GOBs
auto robHeight{gobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
auto robHeight{GobHeight * blockHeight}; // The height of a single ROB (Row of Blocks) in lines
auto surfaceHeight{dimensions.height / guest->format.blockHeight}; // The height of the surface in lines
auto surfaceHeightRobs{util::AlignUp(surfaceHeight, robHeight) / robHeight}; // The height of the surface in ROBs (Row Of Blocks)
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, gobWidth)}; // The width of a ROB in bytes
auto robWidthBlocks{robWidthBytes / gobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
auto robWidthBytes{util::AlignUp((guest->tileConfig.surfaceWidth / guest->format.blockWidth) * guest->format.bpb, GobWidth)}; // The width of a ROB in bytes
auto robWidthBlocks{robWidthBytes / GobWidth}; // The width of a ROB in blocks (and GOBs because block width == 1 on the Tegra X1)
auto robBytes{robWidthBytes * robHeight}; // The size of a ROB in bytes
auto gobYOffset{robWidthBytes * gobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
auto gobYOffset{robWidthBytes * GobHeight}; // The offset of the next Y-axis GOB from the current one in linear space
auto inputSector{pointer}; // The address of the input sector
auto outputRob{bufferData}; // The address of the output block
@ -178,22 +178,22 @@ namespace skyline::gpu {
for (u32 block{}; block < robWidthBlocks; block++) { // Every ROB contains `surfaceWidthBlocks` Blocks
auto outputGob{outputBlock}; // We iterate through a GOB independently of the block
for (u32 gobY{}; gobY < blockHeight; gobY++) { // Every Block contains `blockHeight` Y-axis GOBs
for (u32 index{}; index < sectorWidth * sectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
for (u32 index{}; index < SectorWidth * SectorHeight; index++) { // Every Y-axis GOB contains `sectorWidth * sectorHeight` sectors
u32 xT{((index << 3) & 0b10000) | ((index << 1) & 0b100000)}; // Morton-Swizzle on the X-axis
u32 yT{((index >> 1) & 0b110) | (index & 0b1)}; // Morton-Swizzle on the Y-axis
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, sectorWidth);
inputSector += sectorWidth; // `sectorWidth` bytes are of sequential image data
std::memcpy(outputGob + (yT * robWidthBytes) + xT, inputSector, SectorWidth);
inputSector += SectorWidth; // `sectorWidth` bytes are of sequential image data
}
outputGob += gobYOffset; // Increment the output GOB to the next Y-axis GOB
}
inputSector += paddingY; // Increment the input sector to the next sector
outputBlock += gobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
outputBlock += GobWidth; // Increment the output block to the next block (As Block Width = 1 GOB Width)
}
outputRob += robBytes; // Increment the output block to the next ROB
y += robHeight; // Increment the Y position to the next ROB
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / gobHeight)); // Calculate the amount of Y GOBs which aren't padding
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (sectorWidth * sectorWidth * sectorHeight); // Calculate the amount of padding between contiguous sectors
blockHeight = static_cast<u8>(std::min(static_cast<u32>(blockHeight), (surfaceHeight - y) / GobHeight)); // Calculate the amount of Y GOBs which aren't padding
paddingY = (guest->tileConfig.blockHeight - blockHeight) * (SectorWidth * SectorWidth * SectorHeight); // Calculate the amount of padding between contiguous sectors
}
} else if (guest->tileMode == texture::TileMode::Pitch) {
auto sizeLine{guest->format.GetSize(dimensions.width, 1)}; // The size of a single line of pixel data

View File

@ -264,6 +264,7 @@ namespace skyline::gpu {
/**
* @brief Acquires an exclusive lock on the texture for the calling thread
* @note Naming is in accordance to the BasicLockable named requirement
*/
void lock() {
mutex.lock();
@ -271,6 +272,7 @@ namespace skyline::gpu {
/**
* @brief Relinquishes an existing lock on the texture by the calling thread
* @note Naming is in accordance to the BasicLockable named requirement
*/
void unlock() {
mutex.unlock();
@ -278,6 +280,7 @@ namespace skyline::gpu {
/**
* @brief Attempts to acquire an exclusive lock but returns immediately if it's captured by another thread
* @note Naming is in accordance to the Lockable named requirement
*/
bool try_lock() {
return mutex.try_lock();