video_core: Use a CV for blocking commands.
There is no need for a busy loop here. Let's just use a condition variable to save some power.
This commit is contained in:
parent
e6fb49fa4b
commit
e8bd9aed8b
|
@ -56,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||||
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
} else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
|
||||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||||
return;
|
ASSERT(state.is_running == false);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
state.signaled_fence.store(next.fence);
|
state.signaled_fence.store(next.fence);
|
||||||
|
if (next.block) {
|
||||||
|
// We have to lock the write_lock to ensure that the condition_variable wait not get a
|
||||||
|
// race between the check and the lock itself.
|
||||||
|
std::lock_guard lk(state.write_lock);
|
||||||
|
state.cv.notify_all();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||||
case Settings::GPUAccuracy::Extreme: {
|
case Settings::GPUAccuracy::Extreme: {
|
||||||
auto& gpu = system.GPU();
|
auto& gpu = system.GPU();
|
||||||
u64 fence = gpu.RequestFlush(addr, size);
|
u64 fence = gpu.RequestFlush(addr, size);
|
||||||
PushCommand(GPUTickCommand());
|
PushCommand(GPUTickCommand(), true);
|
||||||
while (fence > gpu.CurrentFlushRequestFence()) {
|
ASSERT(fence <= gpu.CurrentFlushRequestFence());
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
|
@ -124,18 +129,16 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
rasterizer->OnCPUWrite(addr, size);
|
rasterizer->OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::WaitIdle() const {
|
|
||||||
while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) &&
|
|
||||||
state.is_running) {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void ThreadManager::ShutDown() {
|
void ThreadManager::ShutDown() {
|
||||||
if (!state.is_running) {
|
if (!state.is_running) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
std::lock_guard lk(state.write_lock);
|
||||||
state.is_running = false;
|
state.is_running = false;
|
||||||
|
state.cv.notify_all();
|
||||||
|
}
|
||||||
|
|
||||||
if (!thread.joinable()) {
|
if (!thread.joinable()) {
|
||||||
return;
|
return;
|
||||||
|
@ -150,15 +153,21 @@ void ThreadManager::OnCommandListEnd() {
|
||||||
PushCommand(OnCommandListEndCommand());
|
PushCommand(OnCommandListEndCommand());
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
|
||||||
std::unique_lock lk(state.write_lock);
|
|
||||||
const u64 fence{++state.last_fence};
|
|
||||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
|
||||||
|
|
||||||
if (!is_async) {
|
if (!is_async) {
|
||||||
// In synchronous GPU mode, block the caller until the command has executed
|
// In synchronous GPU mode, block the caller until the command has executed
|
||||||
lk.unlock();
|
block = true;
|
||||||
WaitIdle();
|
}
|
||||||
|
|
||||||
|
std::unique_lock lk(state.write_lock);
|
||||||
|
const u64 fence{++state.last_fence};
|
||||||
|
state.queue.Push(CommandDataContainer(std::move(command_data), fence, block));
|
||||||
|
|
||||||
|
if (block) {
|
||||||
|
state.cv.wait(lk, [this, fence] {
|
||||||
|
return fence <= state.signaled_fence.load(std::memory_order_relaxed) ||
|
||||||
|
!state.is_running;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return fence;
|
return fence;
|
||||||
|
|
|
@ -90,11 +90,12 @@ using CommandData =
|
||||||
struct CommandDataContainer {
|
struct CommandDataContainer {
|
||||||
CommandDataContainer() = default;
|
CommandDataContainer() = default;
|
||||||
|
|
||||||
explicit CommandDataContainer(CommandData&& data_, u64 next_fence_)
|
explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_)
|
||||||
: data{std::move(data_)}, fence{next_fence_} {}
|
: data{std::move(data_)}, fence{next_fence_}, block(block_) {}
|
||||||
|
|
||||||
CommandData data;
|
CommandData data;
|
||||||
u64 fence{};
|
u64 fence{};
|
||||||
|
bool block{};
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Struct used to synchronize the GPU thread
|
/// Struct used to synchronize the GPU thread
|
||||||
|
@ -106,6 +107,7 @@ struct SynchState final {
|
||||||
CommandQueue queue;
|
CommandQueue queue;
|
||||||
u64 last_fence{};
|
u64 last_fence{};
|
||||||
std::atomic<u64> signaled_fence{};
|
std::atomic<u64> signaled_fence{};
|
||||||
|
std::condition_variable cv;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Class used to manage the GPU thread
|
/// Class used to manage the GPU thread
|
||||||
|
@ -140,10 +142,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Pushes a command to be executed by the GPU thread
|
/// Pushes a command to be executed by the GPU thread
|
||||||
u64 PushCommand(CommandData&& command_data);
|
u64 PushCommand(CommandData&& command_data, bool block = false);
|
||||||
|
|
||||||
// Wait until the gpu thread is idle.
|
|
||||||
void WaitIdle() const;
|
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
const bool is_async;
|
const bool is_async;
|
||||||
|
|
Reference in New Issue