From e8bd9aed8bf0f60455d0ae6a8f6f3abf92dd8305 Mon Sep 17 00:00:00 2001 From: Markus Wick Date: Wed, 7 Apr 2021 11:41:31 +0200 Subject: [PATCH] video_core: Use a CV for blocking commands. There is no need for a busy loop here. Let's just use a condition variable to save some power. --- src/video_core/gpu_thread.cpp | 45 +++++++++++++++++++++-------------- src/video_core/gpu_thread.h | 11 ++++----- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 9488bf544..7addfbc7b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -56,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } else if (const auto* invalidate = std::get_if(&next.data)) { rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative(next.data)) { - return; + ASSERT(state.is_running == false); } else { UNREACHABLE(); } state.signaled_fence.store(next.fence); + if (next.block) { + // We have to lock the write_lock to ensure that the condition_variable wait not get a + // race between the check and the lock itself. + std::lock_guard lk(state.write_lock); + state.cv.notify_all(); + } } } @@ -105,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); - PushCommand(GPUTickCommand()); - while (fence > gpu.CurrentFlushRequestFence()) { - } + PushCommand(GPUTickCommand(), true); + ASSERT(fence <= gpu.CurrentFlushRequestFence()); break; } default: @@ -124,18 +129,16 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { rasterizer->OnCPUWrite(addr, size); } -void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && - state.is_running) { - } -} - void ThreadManager::ShutDown() { if (!state.is_running) { return; } - state.is_running = false; + { + std::lock_guard lk(state.write_lock); + state.is_running = false; + state.cv.notify_all(); + } if (!thread.joinable()) { return; @@ -150,15 +153,21 @@ void ThreadManager::OnCommandListEnd() { PushCommand(OnCommandListEndCommand()); } -u64 ThreadManager::PushCommand(CommandData&& command_data) { - std::unique_lock lk(state.write_lock); - const u64 fence{++state.last_fence}; - state.queue.Push(CommandDataContainer(std::move(command_data), fence)); - +u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { if (!is_async) { // In synchronous GPU mode, block the caller until the command has executed - lk.unlock(); - WaitIdle(); + block = true; + } + + std::unique_lock lk(state.write_lock); + const u64 fence{++state.last_fence}; + state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); + + if (block) { + state.cv.wait(lk, [this, fence] { + return fence <= state.signaled_fence.load(std::memory_order_relaxed) || + !state.is_running; + }); } return fence; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cb901c22a..11a648f38 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -90,11 +90,12 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) - : data{std::move(data_)}, fence{next_fence_} {} + explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) + : data{std::move(data_)}, fence{next_fence_}, block(block_) {} CommandData data; u64 fence{}; + bool block{}; }; /// Struct used to synchronize the GPU thread @@ -106,6 +107,7 @@ struct SynchState final { CommandQueue queue; u64 last_fence{}; std::atomic signaled_fence{}; + std::condition_variable cv; }; /// Class used to manage the GPU thread @@ -140,10 +142,7 @@ public: private: /// Pushes a command to be executed by the GPU thread - u64 PushCommand(CommandData&& command_data); - - // Wait until the gpu thread is idle. - void WaitIdle() const; + u64 PushCommand(CommandData&& command_data, bool block = false); Core::System& system; const bool is_async;