VideoCore: Refactor fencing system.
This commit is contained in:
parent
4d60410dd9
commit
bc8b3d225e
|
@ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {}
|
|||
|
||||
void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width,
|
||||
u32 height, u32 stride, android::BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect) {
|
||||
const Common::Rectangle<int>& crop_rect,
|
||||
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) {
|
||||
const VAddr addr = nvmap.GetHandleAddress(buffer_handle);
|
||||
LOG_TRACE(Service,
|
||||
"Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}",
|
||||
|
@ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form
|
|||
stride, format, transform, crop_rect};
|
||||
|
||||
system.GetPerfStats().EndSystemFrame();
|
||||
system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0);
|
||||
system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences);
|
||||
system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs());
|
||||
system.GetPerfStats().BeginSystemFrame();
|
||||
}
|
||||
|
|
|
@ -38,7 +38,8 @@ public:
|
|||
/// Performs a screen flip, drawing the buffer pointed to by the handle.
|
||||
void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height,
|
||||
u32 stride, android::BufferTransformFlags transform,
|
||||
const Common::Rectangle<int>& crop_rect);
|
||||
const Common::Rectangle<int>& crop_rect,
|
||||
std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences);
|
||||
|
||||
Kernel::KEvent* QueryEvent(u32 event_id) override;
|
||||
|
||||
|
|
|
@ -269,17 +269,6 @@ void NVFlinger::Compose() {
|
|||
return; // We are likely shutting down
|
||||
}
|
||||
|
||||
auto& syncpoint_manager = system.Host1x().GetSyncpointManager();
|
||||
const auto& multi_fence = buffer.fence;
|
||||
guard->unlock();
|
||||
for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) {
|
||||
const auto& fence = multi_fence.fences[fence_id];
|
||||
syncpoint_manager.WaitGuest(fence.id, fence.value);
|
||||
}
|
||||
guard->lock();
|
||||
|
||||
MicroProfileFlip();
|
||||
|
||||
// Now send the buffer to the GPU for drawing.
|
||||
// TODO(Subv): Support more than just disp0. The display device selection is probably based
|
||||
// on which display we're drawing (Default, Internal, External, etc)
|
||||
|
@ -293,8 +282,10 @@ void NVFlinger::Compose() {
|
|||
|
||||
nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(),
|
||||
igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(),
|
||||
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect);
|
||||
static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect,
|
||||
buffer.fence.fences, buffer.fence.num_fences);
|
||||
|
||||
MicroProfileFlip();
|
||||
guard->lock();
|
||||
|
||||
swap_interval = buffer.swap_interval;
|
||||
|
|
|
@ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
|
|||
const bool is_accuracy_normal =
|
||||
Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal;
|
||||
|
||||
auto it = committed_ranges.begin();
|
||||
while (it != committed_ranges.end()) {
|
||||
auto& current_intervals = *it;
|
||||
auto next_it = std::next(it);
|
||||
while (next_it != committed_ranges.end()) {
|
||||
for (auto& interval : *next_it) {
|
||||
current_intervals.subtract(interval);
|
||||
}
|
||||
next_it++;
|
||||
}
|
||||
it++;
|
||||
}
|
||||
|
||||
boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads;
|
||||
u64 total_size_bytes = 0;
|
||||
u64 largest_copy = 0;
|
||||
|
|
|
@ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
|
|||
void DmaPusher::DispatchCalls() {
|
||||
MICROPROFILE_SCOPE(DispatchCalls);
|
||||
|
||||
gpu.SyncGuestHost();
|
||||
|
||||
dma_pushbuffer_subindex = 0;
|
||||
|
||||
dma_state.is_last_call = true;
|
||||
|
@ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() {
|
|||
}
|
||||
}
|
||||
gpu.FlushCommands();
|
||||
gpu.SyncGuestHost();
|
||||
gpu.OnCommandListEnd();
|
||||
}
|
||||
|
||||
|
|
|
@ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume
|
|||
return;
|
||||
case MAXWELL3D_REG_INDEX(fragment_barrier):
|
||||
return rasterizer->FragmentBarrier();
|
||||
case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache):
|
||||
rasterizer->InvalidateGPUCache();
|
||||
return rasterizer->WaitForIdle();
|
||||
case MAXWELL3D_REG_INDEX(tiled_cache_barrier):
|
||||
return rasterizer->TiledCacheBarrier();
|
||||
}
|
||||
|
@ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() {
|
|||
|
||||
switch (regs.query.query_get.operation) {
|
||||
case Regs::QueryOperation::Release:
|
||||
if (regs.query.query_get.fence == 1) {
|
||||
rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
|
||||
if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) {
|
||||
const GPUVAddr sequence_address{regs.query.QueryAddress()};
|
||||
const u32 payload = regs.query.query_sequence;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u32>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
} else {
|
||||
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
||||
struct LongQueryResult {
|
||||
u64_le value;
|
||||
u64_le timestamp;
|
||||
};
|
||||
const GPUVAddr sequence_address{regs.query.QueryAddress()};
|
||||
const u32 payload = regs.query.query_sequence;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
LongQueryResult query_result{payload, system.GPU().GetTicks()};
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
}
|
||||
break;
|
||||
case Regs::QueryOperation::Acquire:
|
||||
|
|
|
@ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
|||
u64 timestamp;
|
||||
};
|
||||
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_sequence;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
Block block{};
|
||||
block.sequence = regs.semaphore_sequence;
|
||||
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
|
||||
// CoreTiming
|
||||
block.sequence = payload;
|
||||
block.timestamp = gpu.GetTicks();
|
||||
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
|
||||
memory_manager.WriteBlock(sequence_address, &block, sizeof(block));
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
} else {
|
||||
do {
|
||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
|
@ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
|||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
if (word != regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
continue;
|
||||
}
|
||||
|
@ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
|||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
if (word < regs.acquire_value) {
|
||||
rasterizer->ReleaseFences();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
continue;
|
||||
}
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
if (word & regs.semaphore_sequence == 0) {
|
||||
if (word && regs.semaphore_sequence == 0) {
|
||||
rasterizer->ReleaseFences();
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
continue;
|
||||
}
|
||||
|
@ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() {
|
|||
}
|
||||
|
||||
void Puller::ProcessSemaphoreRelease() {
|
||||
rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
|
||||
const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()};
|
||||
const u32 payload = regs.semaphore_release;
|
||||
std::function<void()> operation([this, sequence_address, payload] {
|
||||
memory_manager.Write<u32>(sequence_address, payload);
|
||||
});
|
||||
rasterizer->SignalFence(std::move(operation));
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreAcquire() {
|
||||
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(5));
|
||||
if (word != value) {
|
||||
while (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
rasterizer->ReleaseFences();
|
||||
word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
|
@ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
|
|||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequencePayload:
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::SyncpointPayload:
|
||||
break;
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
|
@ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) {
|
|||
}
|
||||
case BufferMethods::MemOpB: {
|
||||
// Implement this better.
|
||||
rasterizer->SyncGuestHost();
|
||||
rasterizer->InvalidateGPUCache();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::MemOpC:
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
|
@ -19,28 +21,7 @@ namespace VideoCommon {
|
|||
|
||||
class FenceBase {
|
||||
public:
|
||||
explicit FenceBase(u32 payload_, bool is_stubbed_)
|
||||
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
u8* GetAddress() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
u32 GetPayload() const {
|
||||
return payload;
|
||||
}
|
||||
|
||||
bool IsSemaphore() const {
|
||||
return is_semaphore;
|
||||
}
|
||||
|
||||
private:
|
||||
u8* address;
|
||||
u32 payload;
|
||||
bool is_semaphore;
|
||||
explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {}
|
||||
|
||||
protected:
|
||||
bool is_stubbed;
|
||||
|
@ -60,31 +41,28 @@ public:
|
|||
buffer_cache.AccumulateFlushes();
|
||||
}
|
||||
|
||||
void SignalSemaphore(u8* addr, u32 value) {
|
||||
void SyncOperation(std::function<void()>&& func) {
|
||||
uncommitted_operations.emplace_back(std::move(func));
|
||||
}
|
||||
|
||||
void SignalFence(std::function<void()>&& func) {
|
||||
TryReleasePendingFences();
|
||||
const bool should_flush = ShouldFlush();
|
||||
CommitAsyncFlushes();
|
||||
TFence new_fence = CreateFence(addr, value, !should_flush);
|
||||
uncommitted_operations.emplace_back(std::move(func));
|
||||
CommitOperations();
|
||||
TFence new_fence = CreateFence(!should_flush);
|
||||
fences.push(new_fence);
|
||||
QueueFence(new_fence);
|
||||
if (should_flush) {
|
||||
rasterizer.FlushCommands();
|
||||
}
|
||||
rasterizer.SyncGuestHost();
|
||||
}
|
||||
|
||||
void SignalSyncPoint(u32 value) {
|
||||
syncpoint_manager.IncrementGuest(value);
|
||||
TryReleasePendingFences();
|
||||
const bool should_flush = ShouldFlush();
|
||||
CommitAsyncFlushes();
|
||||
TFence new_fence = CreateFence(value, !should_flush);
|
||||
fences.push(new_fence);
|
||||
QueueFence(new_fence);
|
||||
if (should_flush) {
|
||||
rasterizer.FlushCommands();
|
||||
}
|
||||
rasterizer.SyncGuestHost();
|
||||
std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); });
|
||||
SignalFence(std::move(func));
|
||||
}
|
||||
|
||||
void WaitPendingFences() {
|
||||
|
@ -94,12 +72,10 @@ public:
|
|||
WaitFence(current_fence);
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
if (current_fence->IsSemaphore()) {
|
||||
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
|
||||
auto payload = current_fence->GetPayload();
|
||||
std::memcpy(address, &payload, sizeof(payload));
|
||||
} else {
|
||||
syncpoint_manager.IncrementHost(current_fence->GetPayload());
|
||||
auto operations = std::move(pending_operations.front());
|
||||
pending_operations.pop_front();
|
||||
for (auto& operation : operations) {
|
||||
operation();
|
||||
}
|
||||
PopFence();
|
||||
}
|
||||
|
@ -114,11 +90,9 @@ protected:
|
|||
|
||||
virtual ~FenceManager() = default;
|
||||
|
||||
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
|
||||
/// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
|
||||
/// true
|
||||
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
|
||||
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
|
||||
virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
|
||||
virtual TFence CreateFence(bool is_stubbed) = 0;
|
||||
/// Queues a fence into the backend if the fence isn't stubbed.
|
||||
virtual void QueueFence(TFence& fence) = 0;
|
||||
/// Notifies that the backend fence has been signaled/reached in host GPU.
|
||||
|
@ -141,12 +115,10 @@ private:
|
|||
return;
|
||||
}
|
||||
PopAsyncFlushes();
|
||||
if (current_fence->IsSemaphore()) {
|
||||
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
|
||||
const auto payload = current_fence->GetPayload();
|
||||
std::memcpy(address, &payload, sizeof(payload));
|
||||
} else {
|
||||
syncpoint_manager.IncrementHost(current_fence->GetPayload());
|
||||
auto operations = std::move(pending_operations.front());
|
||||
pending_operations.pop_front();
|
||||
for (auto& operation : operations) {
|
||||
operation();
|
||||
}
|
||||
PopFence();
|
||||
}
|
||||
|
@ -165,16 +137,20 @@ private:
|
|||
}
|
||||
|
||||
void PopAsyncFlushes() {
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.PopAsyncFlushes();
|
||||
buffer_cache.PopAsyncFlushes();
|
||||
}
|
||||
query_cache.PopAsyncFlushes();
|
||||
}
|
||||
|
||||
void CommitAsyncFlushes() {
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.CommitAsyncFlushes();
|
||||
buffer_cache.CommitAsyncFlushes();
|
||||
}
|
||||
query_cache.CommitAsyncFlushes();
|
||||
}
|
||||
|
||||
|
@ -183,7 +159,13 @@ private:
|
|||
fences.pop();
|
||||
}
|
||||
|
||||
void CommitOperations() {
|
||||
pending_operations.emplace_back(std::move(uncommitted_operations));
|
||||
}
|
||||
|
||||
std::queue<TFence> fences;
|
||||
std::deque<std::function<void()>> uncommitted_operations;
|
||||
std::deque<std::deque<std::function<void()>>> pending_operations;
|
||||
|
||||
DelayedDestructionRing<TFence, 6> delayed_destruction_ring;
|
||||
};
|
||||
|
|
|
@ -93,17 +93,14 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
void SyncGuestHost() {
|
||||
rasterizer->SyncGuestHost();
|
||||
void InvalidateGPUCache() {
|
||||
rasterizer->InvalidateGPUCache();
|
||||
}
|
||||
|
||||
/// Signal the ending of command list.
|
||||
void OnCommandListEnd() {
|
||||
if (is_async) {
|
||||
// This command only applies to asynchronous GPU mode
|
||||
gpu_thread.OnCommandListEnd();
|
||||
}
|
||||
}
|
||||
|
||||
/// Request a host GPU memory flush from the CPU.
|
||||
template <typename Func>
|
||||
|
@ -296,7 +293,7 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||
Service::Nvidia::NvFence* fences, size_t num_fences) {
|
||||
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
|
||||
size_t current_request_counter{};
|
||||
{
|
||||
std::unique_lock<std::mutex> lk(request_swap_mutex);
|
||||
|
@ -412,8 +409,8 @@ void GPU::FlushCommands() {
|
|||
impl->FlushCommands();
|
||||
}
|
||||
|
||||
void GPU::SyncGuestHost() {
|
||||
impl->SyncGuestHost();
|
||||
void GPU::InvalidateGPUCache() {
|
||||
impl->InvalidateGPUCache();
|
||||
}
|
||||
|
||||
void GPU::OnCommandListEnd() {
|
||||
|
@ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const {
|
|||
}
|
||||
|
||||
void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||
Service::Nvidia::NvFence* fences, size_t num_fences) {
|
||||
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) {
|
||||
impl->RequestSwapBuffers(framebuffer, fences, num_fences);
|
||||
}
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ public:
|
|||
/// Flush all current written commands into the host GPU for execution.
|
||||
void FlushCommands();
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
void SyncGuestHost();
|
||||
void InvalidateGPUCache();
|
||||
/// Signal the ending of command list.
|
||||
void OnCommandListEnd();
|
||||
|
||||
|
@ -180,7 +180,7 @@ public:
|
|||
void RendererFrameEndNotify();
|
||||
|
||||
void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer,
|
||||
Service::Nvidia::NvFence* fences, size_t num_fences);
|
||||
std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences);
|
||||
|
||||
/// Performs any additional setup necessary in order to begin GPU emulation.
|
||||
/// This can be used to launch any necessary threads and register any necessary
|
||||
|
|
|
@ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
|||
}
|
||||
|
||||
void ThreadManager::TickGPU() {
|
||||
PushCommand(GPUTickCommand(), true);
|
||||
PushCommand(GPUTickCommand());
|
||||
}
|
||||
|
||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||
|
|
|
@ -62,7 +62,10 @@ public:
|
|||
virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0;
|
||||
|
||||
/// Signal a GPU based semaphore as a fence
|
||||
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
|
||||
virtual void SignalFence(std::function<void()>&& func) = 0;
|
||||
|
||||
/// Send an operation to be done after a certain amount of flushes.
|
||||
virtual void SyncOperation(std::function<void()>&& func) = 0;
|
||||
|
||||
/// Signal a GPU based syncpoint as a fence
|
||||
virtual void SignalSyncPoint(u32 value) = 0;
|
||||
|
@ -89,7 +92,7 @@ public:
|
|||
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||
|
||||
/// Sync memory between guest and host.
|
||||
virtual void SyncGuestHost() = 0;
|
||||
virtual void InvalidateGPUCache() = 0;
|
||||
|
||||
/// Unmap memory range
|
||||
virtual void UnmapMemory(VAddr addr, u64 size) = 0;
|
||||
|
|
|
@ -10,10 +10,7 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_} {}
|
||||
GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {}
|
||||
|
||||
GLInnerFence::~GLInnerFence() = default;
|
||||
|
||||
|
@ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize
|
|||
BufferCache& buffer_cache_, QueryCache& query_cache_)
|
||||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(value, is_stubbed);
|
||||
}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
|
||||
Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(is_stubbed);
|
||||
}
|
||||
|
||||
void FenceManagerOpenGL::QueueFence(Fence& fence) {
|
||||
|
|
|
@ -16,8 +16,7 @@ namespace OpenGL {
|
|||
|
||||
class GLInnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(bool is_stubbed_);
|
||||
~GLInnerFence();
|
||||
|
||||
void Queue();
|
||||
|
@ -40,8 +39,7 @@ public:
|
|||
QueryCache& query_cache);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
|
|
@ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGuestHost() {
|
||||
void RasterizerOpenGL::InvalidateGPUCache() {
|
||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||
shader_cache.SyncGuestHost();
|
||||
{
|
||||
|
@ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory->Write<u32>(addr, value);
|
||||
return;
|
||||
void RasterizerOpenGL::SignalFence(std::function<void()>&& func) {
|
||||
fence_manager.SignalFence(std::move(func));
|
||||
}
|
||||
auto paddr = gpu_memory->GetPointer(addr);
|
||||
fence_manager.SignalSemaphore(paddr, value);
|
||||
|
||||
void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) {
|
||||
fence_manager.SyncOperation(std::move(func));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
||||
|
@ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::SignalReference() {
|
||||
if (!gpu.IsAsync()) {
|
||||
return;
|
||||
}
|
||||
fence_manager.SignalOrdering();
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::ReleaseFences() {
|
||||
if (!gpu.IsAsync()) {
|
||||
return;
|
||||
}
|
||||
fence_manager.WaitPendingFences();
|
||||
}
|
||||
|
||||
|
@ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() {
|
|||
}
|
||||
|
||||
void RasterizerOpenGL::FragmentBarrier() {
|
||||
glTextureBarrier();
|
||||
glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT);
|
||||
}
|
||||
|
||||
|
|
|
@ -80,10 +80,11 @@ public:
|
|||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalFence(std::function<void()>&& func) override;
|
||||
void SyncOperation(std::function<void()>&& func) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
|
|
|
@ -11,11 +11,8 @@
|
|||
|
||||
namespace Vulkan {
|
||||
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_)
|
||||
: FenceBase{is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::~InnerFence() = default;
|
||||
|
||||
|
@ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G
|
|||
: GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_},
|
||||
scheduler{scheduler_} {}
|
||||
|
||||
Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
|
||||
}
|
||||
|
||||
Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
|
||||
Fence FenceManager::CreateFence(bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, is_stubbed);
|
||||
}
|
||||
|
||||
void FenceManager::QueueFence(Fence& fence) {
|
||||
|
|
|
@ -25,8 +25,7 @@ class Scheduler;
|
|||
|
||||
class InnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
|
||||
explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
|
||||
explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_);
|
||||
~InnerFence();
|
||||
|
||||
void Queue();
|
||||
|
@ -50,8 +49,7 @@ public:
|
|||
QueryCache& query_cache, const Device& device, Scheduler& scheduler);
|
||||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
|
|
@ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SyncGuestHost() {
|
||||
void RasterizerVulkan::InvalidateGPUCache() {
|
||||
pipeline_cache.SyncGuestHost();
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
|
@ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
|
|||
}
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory->Write<u32>(addr, value);
|
||||
return;
|
||||
void RasterizerVulkan::SignalFence(std::function<void()>&& func) {
|
||||
fence_manager.SignalFence(std::move(func));
|
||||
}
|
||||
auto paddr = gpu_memory->GetPointer(addr);
|
||||
fence_manager.SignalSemaphore(paddr, value);
|
||||
|
||||
void RasterizerVulkan::SyncOperation(std::function<void()>&& func) {
|
||||
fence_manager.SyncOperation(std::move(func));
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
||||
|
@ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::SignalReference() {
|
||||
if (!gpu.IsAsync()) {
|
||||
return;
|
||||
}
|
||||
fence_manager.SignalOrdering();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ReleaseFences() {
|
||||
if (!gpu.IsAsync()) {
|
||||
return;
|
||||
}
|
||||
fence_manager.WaitPendingFences();
|
||||
}
|
||||
|
||||
|
|
|
@ -76,10 +76,11 @@ public:
|
|||
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||
void SyncGuestHost() override;
|
||||
void InvalidateGPUCache() override;
|
||||
void UnmapMemory(VAddr addr, u64 size) override;
|
||||
void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||
void SignalFence(std::function<void()>&& func) override;
|
||||
void SyncOperation(std::function<void()>&& func) override;
|
||||
void SignalSyncPoint(u32 value) override;
|
||||
void SignalReference() override;
|
||||
void ReleaseFences() override;
|
||||
|
|
Reference in New Issue