From c2ddda2f5119a7f9dc44d79f340fd7c63a68af7d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 20:18:36 -0400 Subject: [PATCH 1/3] threadsafe_queue: Add std::stop_token overload to PopWait Useful for jthreads which make use of the threadsafe queues. --- src/common/threadsafe_queue.h | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h index 8430b9778..2c8c2b90e 100644 --- a/src/common/threadsafe_queue.h +++ b/src/common/threadsafe_queue.h @@ -14,7 +14,7 @@ #include namespace Common { -template +template class SPSCQueue { public: SPSCQueue() { @@ -84,7 +84,7 @@ public: void Wait() { if (Empty()) { std::unique_lock lock{cv_mutex}; - cv.wait(lock, [this]() { return !Empty(); }); + cv.wait(lock, [this] { return !Empty(); }); } } @@ -95,6 +95,19 @@ public: return t; } + T PopWait(std::stop_token stop_token) { + if (Empty()) { + std::unique_lock lock{cv_mutex}; + cv.wait(lock, stop_token, [this] { return !Empty(); }); + } + if (stop_token.stop_requested()) { + return T{}; + } + T t; + Pop(t); + return t; + } + // not thread-safe void Clear() { size.store(0); @@ -123,13 +136,13 @@ private: ElementPtr* read_ptr; std::atomic_size_t size{0}; std::mutex cv_mutex; - std::condition_variable cv; + std::conditional_t cv; }; // a simple thread-safe, // single reader, multiple writer queue -template +template class MPSCQueue { public: [[nodiscard]] std::size_t Size() const { @@ -166,13 +179,17 @@ public: return spsc_queue.PopWait(); } + T PopWait(std::stop_token stop_token) { + return spsc_queue.PopWait(stop_token); + } + // not thread-safe void Clear() { spsc_queue.Clear(); } private: - SPSCQueue spsc_queue; + SPSCQueue spsc_queue; std::mutex write_lock; }; } // namespace Common From 877cd60b00a3f827062fdaff93183b52174ec134 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 20:32:54 -0400 Subject: [PATCH 2/3] gpu: Use std::jthread for async gpu thread --- src/core/core.cpp | 6 +--- src/video_core/gpu.cpp | 8 ----- src/video_core/gpu.h | 3 -- src/video_core/gpu_thread.cpp | 57 ++++++++--------------------------- src/video_core/gpu_thread.h | 13 +++----- 5 files changed, 18 insertions(+), 69 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index b13350f6e..54ebed2c1 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -305,10 +305,7 @@ struct System::Impl { is_powered_on = false; exit_lock = false; - if (gpu_core) { - gpu_core->ShutDown(); - } - + gpu_core.reset(); services.reset(); service_manager.reset(); cheat_engine.reset(); @@ -317,7 +314,6 @@ struct System::Impl { time_manager.Shutdown(); core_timing.Shutdown(); app_loader.reset(); - gpu_core.reset(); perf_stats.reset(); kernel.Shutdown(); memory.Reset(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index ff024f530..2ae3639b5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -531,14 +531,6 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } -void GPU::ShutDown() { - // Signal that threads should no longer block on syncpoint fences - shutting_down.store(true, std::memory_order_relaxed); - sync_cv.notify_all(); - - gpu_thread.ShutDown(); -} - void GPU::OnCommandListEnd() { if (is_async) { // This command only applies to asynchronous GPU mode diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index a8e98e51b..e6a02a71b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -219,9 +219,6 @@ public: return *shader_notify; } - // Stops the GPU execution and waits for the GPU to finish working - void ShutDown(); - /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 46f642b19..9547f277a 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -17,9 +17,9 @@ namespace VideoCommon::GPUThread { /// Runs the GPU thread -static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, - Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher, - SynchState& state) { +static void RunThread(std::stop_token stop_token, Core::System& system, + VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, + Tegra::DmaPusher& dma_pusher, SynchState& state) { std::string name = "yuzu:GPU"; MicroProfileOnThreadCreate(name.c_str()); SCOPE_EXIT({ MicroProfileOnThreadExit(); }); @@ -28,20 +28,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, Common::SetCurrentThreadPriority(Common::ThreadPriority::High); system.RegisterHostThread(); - // Wait for first GPU command before acquiring the window context - state.queue.Wait(); - - // If emulation was stopped during disk shader loading, abort before trying to acquire context - if (!state.is_running) { - return; - } - auto current_context = context.Acquire(); VideoCore::RasterizerInterface* const rasterizer = renderer.ReadRasterizer(); - CommandDataContainer next; - while (state.is_running) { - next = state.queue.PopWait(); + while (!stop_token.stop_requested()) { + CommandDataContainer next = state.queue.PopWait(stop_token); + if (stop_token.stop_requested()) { + break; + } if (auto* submit_list = std::get_if(&next.data)) { dma_pusher.Push(std::move(submit_list->entries)); dma_pusher.DispatchCalls(); @@ -55,8 +49,6 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, rasterizer->FlushRegion(flush->addr, flush->size); } else if (const auto* invalidate = std::get_if(&next.data)) { rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); - } else if (std::holds_alternative(next.data)) { - ASSERT(state.is_running == false); } else { UNREACHABLE(); } @@ -73,16 +65,14 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, ThreadManager::ThreadManager(Core::System& system_, bool is_async_) : system{system_}, is_async{is_async_} {} -ThreadManager::~ThreadManager() { - ShutDown(); -} +ThreadManager::~ThreadManager() = default; void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher) { rasterizer = renderer.ReadRasterizer(); - thread = std::thread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), - std::ref(dma_pusher), std::ref(state)); + thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context), + std::ref(dma_pusher), std::ref(state)); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { @@ -117,26 +107,6 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { rasterizer->OnCPUWrite(addr, size); } -void ThreadManager::ShutDown() { - if (!state.is_running) { - return; - } - - { - std::lock_guard lk(state.write_lock); - state.is_running = false; - state.cv.notify_all(); - } - - if (!thread.joinable()) { - return; - } - - // Notify GPU thread that a shutdown is pending - PushCommand(EndProcessingCommand()); - thread.join(); -} - void ThreadManager::OnCommandListEnd() { PushCommand(OnCommandListEndCommand()); } @@ -152,9 +122,8 @@ u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); if (block) { - state.cv.wait(lk, [this, fence] { - return fence <= state.signaled_fence.load(std::memory_order_relaxed) || - !state.is_running; + state.cv.wait(lk, thread.get_stop_token(), [this, fence] { + return fence <= state.signaled_fence.load(std::memory_order_relaxed); }); } diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 11a648f38..91bada925 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -33,9 +33,6 @@ class RendererBase; namespace VideoCommon::GPUThread { -/// Command to signal to the GPU thread that processing has ended -struct EndProcessingCommand final {}; - /// Command to signal to the GPU thread that a command list is ready for processing struct SubmitListCommand final { explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {} @@ -83,7 +80,7 @@ struct OnCommandListEndCommand final {}; struct GPUTickCommand final {}; using CommandData = - std::variant; @@ -100,14 +97,12 @@ struct CommandDataContainer { /// Struct used to synchronize the GPU thread struct SynchState final { - std::atomic_bool is_running{true}; - - using CommandQueue = Common::SPSCQueue; + using CommandQueue = Common::SPSCQueue; std::mutex write_lock; CommandQueue queue; u64 last_fence{}; std::atomic signaled_fence{}; - std::condition_variable cv; + std::condition_variable_any cv; }; /// Class used to manage the GPU thread @@ -149,7 +144,7 @@ private: VideoCore::RasterizerInterface* rasterizer = nullptr; SynchState state; - std::thread thread; + std::jthread thread; }; } // namespace VideoCommon::GPUThread From 84f7e7e91c441636b93accae6f7bd52f70a8ab99 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Wed, 15 Sep 2021 20:10:25 -0400 Subject: [PATCH 3/3] vk_scheduler: Use std::jthread --- .../renderer_vulkan/vk_scheduler.cpp | 19 ++++++------------- src/video_core/renderer_vulkan/vk_scheduler.h | 7 +++---- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 1d438787a..0c11c814f 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -43,17 +43,10 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_) command_pool{std::make_unique(*master_semaphore, device)} { AcquireNewChunk(); AllocateWorkerCommandBuffer(); - worker_thread = std::thread(&VKScheduler::WorkerThread, this); + worker_thread = std::jthread([this](std::stop_token token) { WorkerThread(token); }); } -VKScheduler::~VKScheduler() { - { - std::lock_guard lock{work_mutex}; - quit = true; - } - work_cv.notify_all(); - worker_thread.join(); -} +VKScheduler::~VKScheduler() = default; void VKScheduler::Flush(VkSemaphore signal_semaphore, VkSemaphore wait_semaphore) { SubmitExecution(signal_semaphore, wait_semaphore); @@ -135,7 +128,7 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) { return true; } -void VKScheduler::WorkerThread() { +void VKScheduler::WorkerThread(std::stop_token stop_token) { Common::SetCurrentThreadName("yuzu:VulkanWorker"); do { if (work_queue.empty()) { @@ -144,8 +137,8 @@ void VKScheduler::WorkerThread() { std::unique_ptr work; { std::unique_lock lock{work_mutex}; - work_cv.wait(lock, [this] { return !work_queue.empty() || quit; }); - if (quit) { + work_cv.wait(lock, stop_token, [this] { return !work_queue.empty(); }); + if (stop_token.stop_requested()) { continue; } work = std::move(work_queue.front()); @@ -158,7 +151,7 @@ void VKScheduler::WorkerThread() { } std::lock_guard reserve_lock{reserve_mutex}; chunk_reserve.push_back(std::move(work)); - } while (!quit); + } while (!stop_token.stop_requested()); } void VKScheduler::AllocateWorkerCommandBuffer() { diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h index 759ed5a48..bd22e4e83 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.h +++ b/src/video_core/renderer_vulkan/vk_scheduler.h @@ -187,7 +187,7 @@ private: GraphicsPipeline* graphics_pipeline = nullptr; }; - void WorkerThread(); + void WorkerThread(std::stop_token stop_token); void AllocateWorkerCommandBuffer(); @@ -212,7 +212,7 @@ private: vk::CommandBuffer current_cmdbuf; std::unique_ptr chunk; - std::thread worker_thread; + std::jthread worker_thread; State state; @@ -224,9 +224,8 @@ private: std::vector> chunk_reserve; std::mutex reserve_mutex; std::mutex work_mutex; - std::condition_variable work_cv; + std::condition_variable_any work_cv; std::condition_variable wait_cv; - std::atomic_bool quit{}; }; } // namespace Vulkan