Merge pull request #3677 from FernandoS27/better-sync
Introduce Predictive Flushing and Improve ASYNC GPU
This commit is contained in:
commit
bf2ddb8fd5
|
@ -92,7 +92,7 @@ void LogSettings() {
|
||||||
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
LogSetting("Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
||||||
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
|
LogSetting("Renderer_FrameLimit", Settings::values.frame_limit);
|
||||||
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
LogSetting("Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
||||||
LogSetting("Renderer_UseAccurateGpuEmulation", Settings::values.use_accurate_gpu_emulation);
|
LogSetting("Renderer_GPUAccuracyLevel", Settings::values.gpu_accuracy);
|
||||||
LogSetting("Renderer_UseAsynchronousGpuEmulation",
|
LogSetting("Renderer_UseAsynchronousGpuEmulation",
|
||||||
Settings::values.use_asynchronous_gpu_emulation);
|
Settings::values.use_asynchronous_gpu_emulation);
|
||||||
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
|
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
|
||||||
|
@ -109,4 +109,12 @@ void LogSettings() {
|
||||||
LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
|
LogSetting("Services_BCATBoxcatLocal", Settings::values.bcat_boxcat_local);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsGPULevelExtreme() {
|
||||||
|
return values.gpu_accuracy == GPUAccuracy::Extreme;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsGPULevelHigh() {
|
||||||
|
return values.gpu_accuracy == GPUAccuracy::Extreme || values.gpu_accuracy == GPUAccuracy::High;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Settings
|
} // namespace Settings
|
||||||
|
|
|
@ -376,6 +376,12 @@ enum class RendererBackend {
|
||||||
Vulkan = 1,
|
Vulkan = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class GPUAccuracy : u32 {
|
||||||
|
Normal = 0,
|
||||||
|
High = 1,
|
||||||
|
Extreme = 2,
|
||||||
|
};
|
||||||
|
|
||||||
struct Values {
|
struct Values {
|
||||||
// System
|
// System
|
||||||
bool use_docked_mode;
|
bool use_docked_mode;
|
||||||
|
@ -436,7 +442,7 @@ struct Values {
|
||||||
bool use_frame_limit;
|
bool use_frame_limit;
|
||||||
u16 frame_limit;
|
u16 frame_limit;
|
||||||
bool use_disk_shader_cache;
|
bool use_disk_shader_cache;
|
||||||
bool use_accurate_gpu_emulation;
|
GPUAccuracy gpu_accuracy;
|
||||||
bool use_asynchronous_gpu_emulation;
|
bool use_asynchronous_gpu_emulation;
|
||||||
bool use_vsync;
|
bool use_vsync;
|
||||||
bool force_30fps_mode;
|
bool force_30fps_mode;
|
||||||
|
@ -480,6 +486,9 @@ struct Values {
|
||||||
std::map<u64, std::vector<std::string>> disabled_addons;
|
std::map<u64, std::vector<std::string>> disabled_addons;
|
||||||
} extern values;
|
} extern values;
|
||||||
|
|
||||||
|
bool IsGPULevelExtreme();
|
||||||
|
bool IsGPULevelHigh();
|
||||||
|
|
||||||
void Apply();
|
void Apply();
|
||||||
void LogSettings();
|
void LogSettings();
|
||||||
} // namespace Settings
|
} // namespace Settings
|
||||||
|
|
|
@ -56,6 +56,18 @@ static const char* TranslateRenderer(Settings::RendererBackend backend) {
|
||||||
return "Unknown";
|
return "Unknown";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static const char* TranslateGPUAccuracyLevel(Settings::GPUAccuracy backend) {
|
||||||
|
switch (backend) {
|
||||||
|
case Settings::GPUAccuracy::Normal:
|
||||||
|
return "Normal";
|
||||||
|
case Settings::GPUAccuracy::High:
|
||||||
|
return "High";
|
||||||
|
case Settings::GPUAccuracy::Extreme:
|
||||||
|
return "Extreme";
|
||||||
|
}
|
||||||
|
return "Unknown";
|
||||||
|
}
|
||||||
|
|
||||||
u64 GetTelemetryId() {
|
u64 GetTelemetryId() {
|
||||||
u64 telemetry_id{};
|
u64 telemetry_id{};
|
||||||
const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
|
const std::string filename{FileUtil::GetUserPath(FileUtil::UserPath::ConfigDir) +
|
||||||
|
@ -184,8 +196,8 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
|
||||||
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
AddField(field_type, "Renderer_UseFrameLimit", Settings::values.use_frame_limit);
|
||||||
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
|
AddField(field_type, "Renderer_FrameLimit", Settings::values.frame_limit);
|
||||||
AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
AddField(field_type, "Renderer_UseDiskShaderCache", Settings::values.use_disk_shader_cache);
|
||||||
AddField(field_type, "Renderer_UseAccurateGpuEmulation",
|
AddField(field_type, "Renderer_GPUAccuracyLevel",
|
||||||
Settings::values.use_accurate_gpu_emulation);
|
TranslateGPUAccuracyLevel(Settings::values.gpu_accuracy));
|
||||||
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
||||||
Settings::values.use_asynchronous_gpu_emulation);
|
Settings::values.use_asynchronous_gpu_emulation);
|
||||||
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
|
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
|
||||||
|
|
|
@ -23,6 +23,7 @@ add_library(video_core STATIC
|
||||||
engines/shader_bytecode.h
|
engines/shader_bytecode.h
|
||||||
engines/shader_header.h
|
engines/shader_header.h
|
||||||
engines/shader_type.h
|
engines/shader_type.h
|
||||||
|
fence_manager.h
|
||||||
gpu.cpp
|
gpu.cpp
|
||||||
gpu.h
|
gpu.h
|
||||||
gpu_asynch.cpp
|
gpu_asynch.cpp
|
||||||
|
@ -51,6 +52,8 @@ add_library(video_core STATIC
|
||||||
renderer_opengl/gl_buffer_cache.h
|
renderer_opengl/gl_buffer_cache.h
|
||||||
renderer_opengl/gl_device.cpp
|
renderer_opengl/gl_device.cpp
|
||||||
renderer_opengl/gl_device.h
|
renderer_opengl/gl_device.h
|
||||||
|
renderer_opengl/gl_fence_manager.cpp
|
||||||
|
renderer_opengl/gl_fence_manager.h
|
||||||
renderer_opengl/gl_framebuffer_cache.cpp
|
renderer_opengl/gl_framebuffer_cache.cpp
|
||||||
renderer_opengl/gl_framebuffer_cache.h
|
renderer_opengl/gl_framebuffer_cache.h
|
||||||
renderer_opengl/gl_rasterizer.cpp
|
renderer_opengl/gl_rasterizer.cpp
|
||||||
|
@ -176,6 +179,8 @@ if (ENABLE_VULKAN)
|
||||||
renderer_vulkan/vk_descriptor_pool.h
|
renderer_vulkan/vk_descriptor_pool.h
|
||||||
renderer_vulkan/vk_device.cpp
|
renderer_vulkan/vk_device.cpp
|
||||||
renderer_vulkan/vk_device.h
|
renderer_vulkan/vk_device.h
|
||||||
|
renderer_vulkan/vk_fence_manager.cpp
|
||||||
|
renderer_vulkan/vk_fence_manager.h
|
||||||
renderer_vulkan/vk_graphics_pipeline.cpp
|
renderer_vulkan/vk_graphics_pipeline.cpp
|
||||||
renderer_vulkan/vk_graphics_pipeline.h
|
renderer_vulkan/vk_graphics_pipeline.h
|
||||||
renderer_vulkan/vk_image.cpp
|
renderer_vulkan/vk_image.cpp
|
||||||
|
|
|
@ -5,6 +5,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <list>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
@ -18,8 +19,10 @@
|
||||||
|
|
||||||
#include "common/alignment.h"
|
#include "common/alignment.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/buffer_cache/buffer_block.h"
|
#include "video_core/buffer_cache/buffer_block.h"
|
||||||
#include "video_core/buffer_cache/map_interval.h"
|
#include "video_core/buffer_cache/map_interval.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
@ -79,6 +82,9 @@ public:
|
||||||
auto map = MapAddress(block, gpu_addr, cpu_addr, size);
|
auto map = MapAddress(block, gpu_addr, cpu_addr, size);
|
||||||
if (is_written) {
|
if (is_written) {
|
||||||
map->MarkAsModified(true, GetModifiedTicks());
|
map->MarkAsModified(true, GetModifiedTicks());
|
||||||
|
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
|
||||||
|
MarkForAsyncFlush(map);
|
||||||
|
}
|
||||||
if (!map->IsWritten()) {
|
if (!map->IsWritten()) {
|
||||||
map->MarkAsWritten(true);
|
map->MarkAsWritten(true);
|
||||||
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
|
MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
|
||||||
|
@ -137,11 +143,22 @@ public:
|
||||||
});
|
});
|
||||||
for (auto& object : objects) {
|
for (auto& object : objects) {
|
||||||
if (object->IsModified() && object->IsRegistered()) {
|
if (object->IsModified() && object->IsRegistered()) {
|
||||||
|
mutex.unlock();
|
||||||
FlushMap(object);
|
FlushMap(object);
|
||||||
|
mutex.lock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MustFlushRegion(VAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
const std::vector<MapInterval> objects = GetMapsInRange(addr, size);
|
||||||
|
return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) {
|
||||||
|
return map->IsModified() && map->IsRegistered();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/// Mark the specified region as being invalidated
|
/// Mark the specified region as being invalidated
|
||||||
void InvalidateRegion(VAddr addr, u64 size) {
|
void InvalidateRegion(VAddr addr, u64 size) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
@ -154,6 +171,77 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
for (const auto& object : GetMapsInRange(addr, size)) {
|
||||||
|
if (object->IsMemoryMarked() && object->IsRegistered()) {
|
||||||
|
UnmarkMemory(object);
|
||||||
|
object->SetSyncPending(true);
|
||||||
|
marked_for_unregister.emplace_back(object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncGuestHost() {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
for (const auto& object : marked_for_unregister) {
|
||||||
|
if (object->IsRegistered()) {
|
||||||
|
object->SetSyncPending(false);
|
||||||
|
Unregister(object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
marked_for_unregister.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommitAsyncFlushes() {
|
||||||
|
if (uncommitted_flushes) {
|
||||||
|
auto commit_list = std::make_shared<std::list<MapInterval>>();
|
||||||
|
for (auto& map : *uncommitted_flushes) {
|
||||||
|
if (map->IsRegistered() && map->IsModified()) {
|
||||||
|
// TODO(Blinkhawk): Implement backend asynchronous flushing
|
||||||
|
// AsyncFlushMap(map)
|
||||||
|
commit_list->push_back(map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!commit_list->empty()) {
|
||||||
|
committed_flushes.push_back(commit_list);
|
||||||
|
} else {
|
||||||
|
committed_flushes.emplace_back();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
committed_flushes.emplace_back();
|
||||||
|
}
|
||||||
|
uncommitted_flushes.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShouldWaitAsyncFlushes() const {
|
||||||
|
return !committed_flushes.empty() && committed_flushes.front() != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasUncommittedFlushes() const {
|
||||||
|
return uncommitted_flushes != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopAsyncFlushes() {
|
||||||
|
if (committed_flushes.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto& flush_list = committed_flushes.front();
|
||||||
|
if (!flush_list) {
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (MapInterval& map : *flush_list) {
|
||||||
|
if (map->IsRegistered()) {
|
||||||
|
// TODO(Blinkhawk): Replace this for reading the asynchronous flush
|
||||||
|
FlushMap(map);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
|
virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -196,17 +284,30 @@ protected:
|
||||||
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
|
const IntervalType interval{new_map->GetStart(), new_map->GetEnd()};
|
||||||
mapped_addresses.insert({interval, new_map});
|
mapped_addresses.insert({interval, new_map});
|
||||||
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
|
||||||
|
new_map->SetMemoryMarked(true);
|
||||||
if (inherit_written) {
|
if (inherit_written) {
|
||||||
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
|
MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1);
|
||||||
new_map->MarkAsWritten(true);
|
new_map->MarkAsWritten(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unregisters an object from the cache
|
void UnmarkMemory(const MapInterval& map) {
|
||||||
void Unregister(MapInterval& map) {
|
if (!map->IsMemoryMarked()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
const std::size_t size = map->GetEnd() - map->GetStart();
|
const std::size_t size = map->GetEnd() - map->GetStart();
|
||||||
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
|
rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1);
|
||||||
|
map->SetMemoryMarked(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Unregisters an object from the cache
|
||||||
|
void Unregister(const MapInterval& map) {
|
||||||
|
UnmarkMemory(map);
|
||||||
map->MarkAsRegistered(false);
|
map->MarkAsRegistered(false);
|
||||||
|
if (map->IsSyncPending()) {
|
||||||
|
marked_for_unregister.remove(map);
|
||||||
|
map->SetSyncPending(false);
|
||||||
|
}
|
||||||
if (map->IsWritten()) {
|
if (map->IsWritten()) {
|
||||||
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
|
UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1);
|
||||||
}
|
}
|
||||||
|
@ -264,6 +365,9 @@ private:
|
||||||
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
|
MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr);
|
||||||
if (modified_inheritance) {
|
if (modified_inheritance) {
|
||||||
new_map->MarkAsModified(true, GetModifiedTicks());
|
new_map->MarkAsModified(true, GetModifiedTicks());
|
||||||
|
if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) {
|
||||||
|
MarkForAsyncFlush(new_map);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Register(new_map, write_inheritance);
|
Register(new_map, write_inheritance);
|
||||||
return new_map;
|
return new_map;
|
||||||
|
@ -450,6 +554,13 @@ private:
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void MarkForAsyncFlush(MapInterval& map) {
|
||||||
|
if (!uncommitted_flushes) {
|
||||||
|
uncommitted_flushes = std::make_shared<std::unordered_set<MapInterval>>();
|
||||||
|
}
|
||||||
|
uncommitted_flushes->insert(map);
|
||||||
|
}
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
||||||
|
@ -479,6 +590,10 @@ private:
|
||||||
u64 modified_ticks = 0;
|
u64 modified_ticks = 0;
|
||||||
|
|
||||||
std::vector<u8> staging_buffer;
|
std::vector<u8> staging_buffer;
|
||||||
|
std::list<MapInterval> marked_for_unregister;
|
||||||
|
|
||||||
|
std::shared_ptr<std::unordered_set<MapInterval>> uncommitted_flushes{};
|
||||||
|
std::list<std::shared_ptr<std::list<MapInterval>>> committed_flushes;
|
||||||
|
|
||||||
std::recursive_mutex mutex;
|
std::recursive_mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
|
@ -46,6 +46,22 @@ public:
|
||||||
return is_registered;
|
return is_registered;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetMemoryMarked(bool is_memory_marked_) {
|
||||||
|
is_memory_marked = is_memory_marked_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsMemoryMarked() const {
|
||||||
|
return is_memory_marked;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetSyncPending(bool is_sync_pending_) {
|
||||||
|
is_sync_pending = is_sync_pending_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsSyncPending() const {
|
||||||
|
return is_sync_pending;
|
||||||
|
}
|
||||||
|
|
||||||
VAddr GetStart() const {
|
VAddr GetStart() const {
|
||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
@ -83,6 +99,8 @@ private:
|
||||||
bool is_written{};
|
bool is_written{};
|
||||||
bool is_modified{};
|
bool is_modified{};
|
||||||
bool is_registered{};
|
bool is_registered{};
|
||||||
|
bool is_memory_marked{};
|
||||||
|
bool is_sync_pending{};
|
||||||
u64 ticks{};
|
u64 ticks{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128,
|
||||||
void DmaPusher::DispatchCalls() {
|
void DmaPusher::DispatchCalls() {
|
||||||
MICROPROFILE_SCOPE(DispatchCalls);
|
MICROPROFILE_SCOPE(DispatchCalls);
|
||||||
|
|
||||||
|
gpu.SyncGuestHost();
|
||||||
// On entering GPU code, assume all memory may be touched by the ARM core.
|
// On entering GPU code, assume all memory may be touched by the ARM core.
|
||||||
gpu.Maxwell3D().OnMemoryWrite();
|
gpu.Maxwell3D().OnMemoryWrite();
|
||||||
|
|
||||||
|
@ -32,6 +33,8 @@ void DmaPusher::DispatchCalls() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gpu.FlushCommands();
|
gpu.FlushCommands();
|
||||||
|
gpu.SyncGuestHost();
|
||||||
|
gpu.OnCommandListEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DmaPusher::Step() {
|
bool DmaPusher::Step() {
|
||||||
|
|
|
@ -404,7 +404,11 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
|
|
||||||
switch (regs.query.query_get.operation) {
|
switch (regs.query.query_get.operation) {
|
||||||
case Regs::QueryOperation::Release:
|
case Regs::QueryOperation::Release:
|
||||||
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
if (regs.query.query_get.fence == 1) {
|
||||||
|
rasterizer.SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence);
|
||||||
|
} else {
|
||||||
|
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case Regs::QueryOperation::Acquire:
|
case Regs::QueryOperation::Acquire:
|
||||||
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
|
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
|
||||||
|
@ -483,7 +487,7 @@ void Maxwell3D::ProcessSyncPoint() {
|
||||||
const u32 increment = regs.sync_info.increment.Value();
|
const u32 increment = regs.sync_info.increment.Value();
|
||||||
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
|
[[maybe_unused]] const u32 cache_flush = regs.sync_info.unknown.Value();
|
||||||
if (increment) {
|
if (increment) {
|
||||||
system.GPU().IncrementSyncPoint(sync_point);
|
rasterizer.SignalSyncPoint(sync_point);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -104,8 +104,13 @@ void MaxwellDMA::HandleCopy() {
|
||||||
write_buffer.resize(dst_size);
|
write_buffer.resize(dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
||||||
|
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
||||||
|
} else {
|
||||||
|
memory_manager.ReadBlockUnsafe(source, read_buffer.data(), src_size);
|
||||||
|
memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size);
|
||||||
|
}
|
||||||
|
|
||||||
Texture::UnswizzleSubrect(
|
Texture::UnswizzleSubrect(
|
||||||
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
|
regs.x_count, regs.y_count, regs.dst_pitch, regs.src_params.size_x, bytes_per_pixel,
|
||||||
|
@ -136,7 +141,7 @@ void MaxwellDMA::HandleCopy() {
|
||||||
write_buffer.resize(dst_size);
|
write_buffer.resize(dst_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
memory_manager.ReadBlock(source, read_buffer.data(), src_size);
|
||||||
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
memory_manager.ReadBlock(dest, write_buffer.data(), dst_size);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -0,0 +1,170 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <queue>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "core/memory.h"
|
||||||
|
#include "core/settings.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
|
||||||
|
class FenceBase {
|
||||||
|
public:
|
||||||
|
FenceBase(u32 payload, bool is_stubbed)
|
||||||
|
: address{}, payload{payload}, is_semaphore{false}, is_stubbed{is_stubbed} {}
|
||||||
|
|
||||||
|
FenceBase(GPUVAddr address, u32 payload, bool is_stubbed)
|
||||||
|
: address{address}, payload{payload}, is_semaphore{true}, is_stubbed{is_stubbed} {}
|
||||||
|
|
||||||
|
GPUVAddr GetAddress() const {
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GetPayload() const {
|
||||||
|
return payload;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsSemaphore() const {
|
||||||
|
return is_semaphore;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
GPUVAddr address;
|
||||||
|
u32 payload;
|
||||||
|
bool is_semaphore;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
bool is_stubbed;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename TFence, typename TTextureCache, typename TTBufferCache, typename TQueryCache>
|
||||||
|
class FenceManager {
|
||||||
|
public:
|
||||||
|
void SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
|
TryReleasePendingFences();
|
||||||
|
const bool should_flush = ShouldFlush();
|
||||||
|
CommitAsyncFlushes();
|
||||||
|
TFence new_fence = CreateFence(addr, value, !should_flush);
|
||||||
|
fences.push(new_fence);
|
||||||
|
QueueFence(new_fence);
|
||||||
|
if (should_flush) {
|
||||||
|
rasterizer.FlushCommands();
|
||||||
|
}
|
||||||
|
rasterizer.SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SignalSyncPoint(u32 value) {
|
||||||
|
TryReleasePendingFences();
|
||||||
|
const bool should_flush = ShouldFlush();
|
||||||
|
CommitAsyncFlushes();
|
||||||
|
TFence new_fence = CreateFence(value, !should_flush);
|
||||||
|
fences.push(new_fence);
|
||||||
|
QueueFence(new_fence);
|
||||||
|
if (should_flush) {
|
||||||
|
rasterizer.FlushCommands();
|
||||||
|
}
|
||||||
|
rasterizer.SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void WaitPendingFences() {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
|
while (!fences.empty()) {
|
||||||
|
TFence& current_fence = fences.front();
|
||||||
|
if (ShouldWait()) {
|
||||||
|
WaitFence(current_fence);
|
||||||
|
}
|
||||||
|
PopAsyncFlushes();
|
||||||
|
if (current_fence->IsSemaphore()) {
|
||||||
|
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
|
||||||
|
} else {
|
||||||
|
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||||
|
}
|
||||||
|
fences.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
FenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
TTextureCache& texture_cache, TTBufferCache& buffer_cache,
|
||||||
|
TQueryCache& query_cache)
|
||||||
|
: system{system}, rasterizer{rasterizer}, texture_cache{texture_cache},
|
||||||
|
buffer_cache{buffer_cache}, query_cache{query_cache} {}
|
||||||
|
|
||||||
|
virtual ~FenceManager() {}
|
||||||
|
|
||||||
|
/// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
|
||||||
|
/// true
|
||||||
|
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
|
||||||
|
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
|
||||||
|
virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
|
||||||
|
/// Queues a fence into the backend if the fence isn't stubbed.
|
||||||
|
virtual void QueueFence(TFence& fence) = 0;
|
||||||
|
/// Notifies that the backend fence has been signaled/reached in host GPU.
|
||||||
|
virtual bool IsFenceSignaled(TFence& fence) const = 0;
|
||||||
|
/// Waits until a fence has been signalled by the host GPU.
|
||||||
|
virtual void WaitFence(TFence& fence) = 0;
|
||||||
|
|
||||||
|
Core::System& system;
|
||||||
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
TTextureCache& texture_cache;
|
||||||
|
TTBufferCache& buffer_cache;
|
||||||
|
TQueryCache& query_cache;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void TryReleasePendingFences() {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
|
while (!fences.empty()) {
|
||||||
|
TFence& current_fence = fences.front();
|
||||||
|
if (ShouldWait() && !IsFenceSignaled(current_fence)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
PopAsyncFlushes();
|
||||||
|
if (current_fence->IsSemaphore()) {
|
||||||
|
memory_manager.Write<u32>(current_fence->GetAddress(), current_fence->GetPayload());
|
||||||
|
} else {
|
||||||
|
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||||
|
}
|
||||||
|
fences.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShouldWait() const {
|
||||||
|
return texture_cache.ShouldWaitAsyncFlushes() || buffer_cache.ShouldWaitAsyncFlushes() ||
|
||||||
|
query_cache.ShouldWaitAsyncFlushes();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShouldFlush() const {
|
||||||
|
return texture_cache.HasUncommittedFlushes() || buffer_cache.HasUncommittedFlushes() ||
|
||||||
|
query_cache.HasUncommittedFlushes();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopAsyncFlushes() {
|
||||||
|
texture_cache.PopAsyncFlushes();
|
||||||
|
buffer_cache.PopAsyncFlushes();
|
||||||
|
query_cache.PopAsyncFlushes();
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommitAsyncFlushes() {
|
||||||
|
texture_cache.CommitAsyncFlushes();
|
||||||
|
buffer_cache.CommitAsyncFlushes();
|
||||||
|
query_cache.CommitAsyncFlushes();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::queue<TFence> fences;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCommon
|
|
@ -125,6 +125,28 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u64 GPU::RequestFlush(VAddr addr, std::size_t size) {
|
||||||
|
std::unique_lock lck{flush_request_mutex};
|
||||||
|
const u64 fence = ++last_flush_fence;
|
||||||
|
flush_requests.emplace_back(fence, addr, size);
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::TickWork() {
|
||||||
|
std::unique_lock lck{flush_request_mutex};
|
||||||
|
while (!flush_requests.empty()) {
|
||||||
|
auto& request = flush_requests.front();
|
||||||
|
const u64 fence = request.fence;
|
||||||
|
const VAddr addr = request.addr;
|
||||||
|
const std::size_t size = request.size;
|
||||||
|
flush_requests.pop_front();
|
||||||
|
flush_request_mutex.unlock();
|
||||||
|
renderer->Rasterizer().FlushRegion(addr, size);
|
||||||
|
current_flush_fence.store(fence);
|
||||||
|
flush_request_mutex.lock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u64 GPU::GetTicks() const {
|
u64 GPU::GetTicks() const {
|
||||||
// This values were reversed engineered by fincs from NVN
|
// This values were reversed engineered by fincs from NVN
|
||||||
// The gpu clock is reported in units of 385/625 nanoseconds
|
// The gpu clock is reported in units of 385/625 nanoseconds
|
||||||
|
@ -142,6 +164,13 @@ void GPU::FlushCommands() {
|
||||||
renderer->Rasterizer().FlushCommands();
|
renderer->Rasterizer().FlushCommands();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPU::SyncGuestHost() {
|
||||||
|
renderer->Rasterizer().SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void GPU::OnCommandListEnd() {
|
||||||
|
renderer->Rasterizer().ReleaseFences();
|
||||||
|
}
|
||||||
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
|
||||||
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
|
||||||
// So the values you see in docs might be multiplied by 4.
|
// So the values you see in docs might be multiplied by 4.
|
||||||
|
|
|
@ -155,7 +155,23 @@ public:
|
||||||
/// Calls a GPU method.
|
/// Calls a GPU method.
|
||||||
void CallMethod(const MethodCall& method_call);
|
void CallMethod(const MethodCall& method_call);
|
||||||
|
|
||||||
|
/// Flush all current written commands into the host GPU for execution.
|
||||||
void FlushCommands();
|
void FlushCommands();
|
||||||
|
/// Synchronizes CPU writes with Host GPU memory.
|
||||||
|
void SyncGuestHost();
|
||||||
|
/// Signal the ending of command list.
|
||||||
|
virtual void OnCommandListEnd();
|
||||||
|
|
||||||
|
/// Request a host GPU memory flush from the CPU.
|
||||||
|
u64 RequestFlush(VAddr addr, std::size_t size);
|
||||||
|
|
||||||
|
/// Obtains current flush request fence id.
|
||||||
|
u64 CurrentFlushRequestFence() const {
|
||||||
|
return current_flush_fence.load(std::memory_order_relaxed);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tick pending requests within the GPU.
|
||||||
|
void TickWork();
|
||||||
|
|
||||||
/// Returns a reference to the Maxwell3D GPU engine.
|
/// Returns a reference to the Maxwell3D GPU engine.
|
||||||
Engines::Maxwell3D& Maxwell3D();
|
Engines::Maxwell3D& Maxwell3D();
|
||||||
|
@ -325,6 +341,19 @@ private:
|
||||||
|
|
||||||
std::condition_variable sync_cv;
|
std::condition_variable sync_cv;
|
||||||
|
|
||||||
|
struct FlushRequest {
|
||||||
|
FlushRequest(u64 fence, VAddr addr, std::size_t size)
|
||||||
|
: fence{fence}, addr{addr}, size{size} {}
|
||||||
|
u64 fence;
|
||||||
|
VAddr addr;
|
||||||
|
std::size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::list<FlushRequest> flush_requests;
|
||||||
|
std::atomic<u64> current_flush_fence{};
|
||||||
|
u64 last_flush_fence{};
|
||||||
|
std::mutex flush_request_mutex;
|
||||||
|
|
||||||
const bool is_async;
|
const bool is_async;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const {
|
||||||
gpu_thread.WaitIdle();
|
gpu_thread.WaitIdle();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GPUAsynch::OnCommandListEnd() {
|
||||||
|
gpu_thread.OnCommandListEnd();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon
|
} // namespace VideoCommon
|
||||||
|
|
|
@ -32,6 +32,8 @@ public:
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void WaitIdle() const override;
|
void WaitIdle() const override;
|
||||||
|
|
||||||
|
void OnCommandListEnd() override;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/frontend/emu_window.h"
|
#include "core/frontend/emu_window.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/dma_pusher.h"
|
#include "video_core/dma_pusher.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/gpu_thread.h"
|
#include "video_core/gpu_thread.h"
|
||||||
|
@ -14,8 +15,9 @@
|
||||||
namespace VideoCommon::GPUThread {
|
namespace VideoCommon::GPUThread {
|
||||||
|
|
||||||
/// Runs the GPU thread
|
/// Runs the GPU thread
|
||||||
static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
|
static void RunThread(Core::System& system, VideoCore::RendererBase& renderer,
|
||||||
Tegra::DmaPusher& dma_pusher, SynchState& state) {
|
Core::Frontend::GraphicsContext& context, Tegra::DmaPusher& dma_pusher,
|
||||||
|
SynchState& state) {
|
||||||
MicroProfileOnThreadCreate("GpuThread");
|
MicroProfileOnThreadCreate("GpuThread");
|
||||||
|
|
||||||
// Wait for first GPU command before acquiring the window context
|
// Wait for first GPU command before acquiring the window context
|
||||||
|
@ -37,10 +39,14 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic
|
||||||
dma_pusher.DispatchCalls();
|
dma_pusher.DispatchCalls();
|
||||||
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||||
|
} else if (const auto data = std::get_if<OnCommandListEndCommand>(&next.data)) {
|
||||||
|
renderer.Rasterizer().ReleaseFences();
|
||||||
|
} else if (const auto data = std::get_if<GPUTickCommand>(&next.data)) {
|
||||||
|
system.GPU().TickWork();
|
||||||
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) {
|
||||||
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
renderer.Rasterizer().FlushRegion(data->addr, data->size);
|
||||||
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
} else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) {
|
||||||
renderer.Rasterizer().InvalidateRegion(data->addr, data->size);
|
renderer.Rasterizer().OnCPUWrite(data->addr, data->size);
|
||||||
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
} else if (std::holds_alternative<EndProcessingCommand>(next.data)) {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
|
@ -65,8 +71,8 @@ ThreadManager::~ThreadManager() {
|
||||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||||
Core::Frontend::GraphicsContext& context,
|
Core::Frontend::GraphicsContext& context,
|
||||||
Tegra::DmaPusher& dma_pusher) {
|
Tegra::DmaPusher& dma_pusher) {
|
||||||
thread = std::thread{RunThread, std::ref(renderer), std::ref(context), std::ref(dma_pusher),
|
thread = std::thread{RunThread, std::ref(system), std::ref(renderer),
|
||||||
std::ref(state)};
|
std::ref(context), std::ref(dma_pusher), std::ref(state)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||||
|
@ -78,16 +84,29 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
|
||||||
PushCommand(FlushRegionCommand(addr, size));
|
if (!Settings::IsGPULevelHigh()) {
|
||||||
|
PushCommand(FlushRegionCommand(addr, size));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!Settings::IsGPULevelExtreme()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (system.Renderer().Rasterizer().MustFlushRegion(addr, size)) {
|
||||||
|
auto& gpu = system.GPU();
|
||||||
|
u64 fence = gpu.RequestFlush(addr, size);
|
||||||
|
PushCommand(GPUTickCommand());
|
||||||
|
while (fence > gpu.CurrentFlushRequestFence()) {
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
system.Renderer().Rasterizer().InvalidateRegion(addr, size);
|
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
// Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
|
||||||
InvalidateRegion(addr, size);
|
system.Renderer().Rasterizer().OnCPUWrite(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ThreadManager::WaitIdle() const {
|
void ThreadManager::WaitIdle() const {
|
||||||
|
@ -95,6 +114,10 @@ void ThreadManager::WaitIdle() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ThreadManager::OnCommandListEnd() {
|
||||||
|
PushCommand(OnCommandListEndCommand());
|
||||||
|
}
|
||||||
|
|
||||||
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
u64 ThreadManager::PushCommand(CommandData&& command_data) {
|
||||||
const u64 fence{++state.last_fence};
|
const u64 fence{++state.last_fence};
|
||||||
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
state.queue.Push(CommandDataContainer(std::move(command_data), fence));
|
||||||
|
|
|
@ -70,9 +70,16 @@ struct FlushAndInvalidateRegionCommand final {
|
||||||
u64 size;
|
u64 size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Command called within the gpu, to schedule actions after a command list end
|
||||||
|
struct OnCommandListEndCommand final {};
|
||||||
|
|
||||||
|
/// Command to make the gpu look into pending requests
|
||||||
|
struct GPUTickCommand final {};
|
||||||
|
|
||||||
using CommandData =
|
using CommandData =
|
||||||
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
|
||||||
InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
|
InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand,
|
||||||
|
GPUTickCommand>;
|
||||||
|
|
||||||
struct CommandDataContainer {
|
struct CommandDataContainer {
|
||||||
CommandDataContainer() = default;
|
CommandDataContainer() = default;
|
||||||
|
@ -122,6 +129,8 @@ public:
|
||||||
// Wait until the gpu thread is idle.
|
// Wait until the gpu thread is idle.
|
||||||
void WaitIdle() const;
|
void WaitIdle() const;
|
||||||
|
|
||||||
|
void OnCommandListEnd();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Pushes a command to be executed by the GPU thread
|
/// Pushes a command to be executed by the GPU thread
|
||||||
u64 PushCommand(CommandData&& command_data);
|
u64 PushCommand(CommandData&& command_data);
|
||||||
|
|
|
@ -12,10 +12,12 @@
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
@ -130,6 +132,9 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
query->BindCounter(Stream(type).Current(), timestamp);
|
query->BindCounter(Stream(type).Current(), timestamp);
|
||||||
|
if (Settings::values.use_asynchronous_gpu_emulation) {
|
||||||
|
AsyncFlushQuery(cpu_addr);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||||
|
@ -170,6 +175,37 @@ public:
|
||||||
return streams[static_cast<std::size_t>(type)];
|
return streams[static_cast<std::size_t>(type)];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommitAsyncFlushes() {
|
||||||
|
committed_flushes.push_back(uncommitted_flushes);
|
||||||
|
uncommitted_flushes.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasUncommittedFlushes() const {
|
||||||
|
return uncommitted_flushes != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShouldWaitAsyncFlushes() const {
|
||||||
|
if (committed_flushes.empty()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return committed_flushes.front() != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopAsyncFlushes() {
|
||||||
|
if (committed_flushes.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto& flush_list = committed_flushes.front();
|
||||||
|
if (!flush_list) {
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (VAddr query_address : *flush_list) {
|
||||||
|
FlushAndRemoveRegion(query_address, 4);
|
||||||
|
}
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||||
|
|
||||||
|
@ -224,6 +260,13 @@ private:
|
||||||
return found != std::end(contents) ? &*found : nullptr;
|
return found != std::end(contents) ? &*found : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AsyncFlushQuery(VAddr addr) {
|
||||||
|
if (!uncommitted_flushes) {
|
||||||
|
uncommitted_flushes = std::make_shared<std::unordered_set<VAddr>>();
|
||||||
|
}
|
||||||
|
uncommitted_flushes->insert(addr);
|
||||||
|
}
|
||||||
|
|
||||||
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||||
static constexpr unsigned PAGE_SHIFT = 12;
|
static constexpr unsigned PAGE_SHIFT = 12;
|
||||||
|
|
||||||
|
@ -235,6 +278,9 @@ private:
|
||||||
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||||
|
|
||||||
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||||
|
|
||||||
|
std::shared_ptr<std::unordered_set<VAddr>> uncommitted_flushes{};
|
||||||
|
std::list<std::shared_ptr<std::unordered_set<VAddr>>> committed_flushes;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class QueryCache, class HostCounter>
|
template <class QueryCache, class HostCounter>
|
||||||
|
|
|
@ -49,15 +49,33 @@ public:
|
||||||
/// Records a GPU query and caches it
|
/// Records a GPU query and caches it
|
||||||
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
||||||
|
|
||||||
|
/// Signal a GPU based semaphore as a fence
|
||||||
|
virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0;
|
||||||
|
|
||||||
|
/// Signal a GPU based syncpoint as a fence
|
||||||
|
virtual void SignalSyncPoint(u32 value) = 0;
|
||||||
|
|
||||||
|
/// Release all pending fences.
|
||||||
|
virtual void ReleaseFences() = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushRegion(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
/// Check if the the specified memory area requires flushing to CPU Memory.
|
||||||
|
virtual bool MustFlushRegion(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be invalidated
|
/// Notify rasterizer that any caches of the specified region should be invalidated
|
||||||
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void InvalidateRegion(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
/// Notify rasterizer that any caches of the specified region are desync with guest
|
||||||
|
virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
|
||||||
|
|
||||||
|
/// Sync memory between guest and host.
|
||||||
|
virtual void SyncGuestHost() = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
|
||||||
/// and invalidated
|
/// and invalidated
|
||||||
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0;
|
||||||
|
|
|
@ -52,7 +52,7 @@ Buffer OGLBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void OGLBufferCache::WriteBarrier() {
|
void OGLBufferCache::WriteBarrier() {
|
||||||
glMemoryBarrier(GL_ALL_BARRIER_BITS);
|
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
|
GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
|
||||||
|
@ -72,6 +72,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s
|
||||||
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
|
||||||
u8* data) {
|
u8* data) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
|
||||||
|
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||||
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
|
||||||
static_cast<GLsizeiptr>(size), data);
|
static_cast<GLsizeiptr>(size), data);
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
|
||||||
|
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
GLInnerFence::GLInnerFence(u32 payload, bool is_stubbed)
|
||||||
|
: VideoCommon::FenceBase(payload, is_stubbed), sync_object{} {}
|
||||||
|
|
||||||
|
GLInnerFence::GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed)
|
||||||
|
: VideoCommon::FenceBase(address, payload, is_stubbed), sync_object{} {}
|
||||||
|
|
||||||
|
GLInnerFence::~GLInnerFence() = default;
|
||||||
|
|
||||||
|
void GLInnerFence::Queue() {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(sync_object.handle == 0);
|
||||||
|
sync_object.Create();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool GLInnerFence::IsSignaled() const {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
ASSERT(sync_object.handle != 0);
|
||||||
|
GLsizei length;
|
||||||
|
GLint sync_status;
|
||||||
|
glGetSynciv(sync_object.handle, GL_SYNC_STATUS, sizeof(GLint), &length, &sync_status);
|
||||||
|
return sync_status == GL_SIGNALED;
|
||||||
|
}
|
||||||
|
|
||||||
|
void GLInnerFence::Wait() {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(sync_object.handle != 0);
|
||||||
|
glClientWaitSync(sync_object.handle, 0, GL_TIMEOUT_IGNORED);
|
||||||
|
}
|
||||||
|
|
||||||
|
FenceManagerOpenGL::FenceManagerOpenGL(Core::System& system,
|
||||||
|
VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
TextureCacheOpenGL& texture_cache,
|
||||||
|
OGLBufferCache& buffer_cache, QueryCache& query_cache)
|
||||||
|
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache) {}
|
||||||
|
|
||||||
|
Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
||||||
|
return std::make_shared<GLInnerFence>(value, is_stubbed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
|
||||||
|
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FenceManagerOpenGL::QueueFence(Fence& fence) {
|
||||||
|
fence->Queue();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool FenceManagerOpenGL::IsFenceSignaled(Fence& fence) const {
|
||||||
|
return fence->IsSignaled();
|
||||||
|
}
|
||||||
|
|
||||||
|
void FenceManagerOpenGL::WaitFence(Fence& fence) {
|
||||||
|
fence->Wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -0,0 +1,53 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/fence_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class GLInnerFence : public VideoCommon::FenceBase {
|
||||||
|
public:
|
||||||
|
GLInnerFence(u32 payload, bool is_stubbed);
|
||||||
|
GLInnerFence(GPUVAddr address, u32 payload, bool is_stubbed);
|
||||||
|
~GLInnerFence();
|
||||||
|
|
||||||
|
void Queue();
|
||||||
|
|
||||||
|
bool IsSignaled() const;
|
||||||
|
|
||||||
|
void Wait();
|
||||||
|
|
||||||
|
private:
|
||||||
|
OGLSync sync_object;
|
||||||
|
};
|
||||||
|
|
||||||
|
using Fence = std::shared_ptr<GLInnerFence>;
|
||||||
|
using GenericFenceManager =
|
||||||
|
VideoCommon::FenceManager<Fence, TextureCacheOpenGL, OGLBufferCache, QueryCache>;
|
||||||
|
|
||||||
|
class FenceManagerOpenGL final : public GenericFenceManager {
|
||||||
|
public:
|
||||||
|
FenceManagerOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
TextureCacheOpenGL& texture_cache, OGLBufferCache& buffer_cache,
|
||||||
|
QueryCache& query_cache);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||||
|
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
|
||||||
|
void QueueFence(Fence& fence) override;
|
||||||
|
bool IsFenceSignaled(Fence& fence) const override;
|
||||||
|
void WaitFence(Fence& fence) override;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -99,9 +99,10 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
|
||||||
ScreenInfo& info, GLShader::ProgramManager& program_manager,
|
ScreenInfo& info, GLShader::ProgramManager& program_manager,
|
||||||
StateTracker& state_tracker)
|
StateTracker& state_tracker)
|
||||||
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
|
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
|
||||||
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
|
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
|
||||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
|
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
|
||||||
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
|
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
|
||||||
|
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||||
CheckExtensions();
|
CheckExtensions();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -599,6 +600,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
EndTransformFeedback();
|
EndTransformFeedback();
|
||||||
|
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
|
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
|
@ -649,6 +652,13 @@ void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {
|
||||||
query_cache.FlushRegion(addr, size);
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RasterizerOpenGL::MustFlushRegion(VAddr addr, u64 size) {
|
||||||
|
if (!Settings::IsGPULevelHigh()) {
|
||||||
|
return buffer_cache.MustFlushRegion(addr, size);
|
||||||
|
}
|
||||||
|
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
|
@ -660,8 +670,52 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
query_cache.InvalidateRegion(addr, size);
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
if (addr == 0 || size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
texture_cache.OnCPUWrite(addr, size);
|
||||||
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
|
buffer_cache.OnCPUWrite(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SyncGuestHost() {
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_CacheManagement);
|
||||||
|
texture_cache.SyncGuestHost();
|
||||||
|
buffer_cache.SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
|
memory_manager.Write<u32>(addr, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.SignalSemaphore(addr, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
gpu.IncrementSyncPoint(value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.SignalSyncPoint(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ReleaseFences() {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.WaitPendingFences();
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
FlushRegion(addr, size);
|
FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
InvalidateRegion(addr, size);
|
InvalidateRegion(addr, size);
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "video_core/rasterizer_interface.h"
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_fence_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
@ -66,7 +67,13 @@ public:
|
||||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(VAddr addr, u64 size) override;
|
||||||
|
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||||
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
|
void SyncGuestHost() override;
|
||||||
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
|
void SignalSyncPoint(u32 value) override;
|
||||||
|
void ReleaseFences() override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void FlushCommands() override;
|
void FlushCommands() override;
|
||||||
void TickFrame() override;
|
void TickFrame() override;
|
||||||
|
@ -222,6 +229,8 @@ private:
|
||||||
SamplerCacheOpenGL sampler_cache;
|
SamplerCacheOpenGL sampler_cache;
|
||||||
FramebufferCacheOpenGL framebuffer_cache;
|
FramebufferCacheOpenGL framebuffer_cache;
|
||||||
QueryCache query_cache;
|
QueryCache query_cache;
|
||||||
|
OGLBufferCache buffer_cache;
|
||||||
|
FenceManagerOpenGL fence_manager;
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
ScreenInfo& screen_info;
|
ScreenInfo& screen_info;
|
||||||
|
@ -229,7 +238,6 @@ private:
|
||||||
StateTracker& state_tracker;
|
StateTracker& state_tracker;
|
||||||
|
|
||||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
OGLBufferCache buffer_cache;
|
|
||||||
|
|
||||||
GLint vertex_binding = 0;
|
GLint vertex_binding = 0;
|
||||||
|
|
||||||
|
|
|
@ -448,7 +448,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
|
|
||||||
// Look up shader in the cache based on address
|
// Look up shader in the cache based on address
|
||||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
const auto cpu_addr{memory_manager.GpuToCpuAddress(address)};
|
||||||
Shader shader{cpu_addr ? TryGet(*cpu_addr) : nullptr};
|
Shader shader{cpu_addr ? TryGet(*cpu_addr) : null_shader};
|
||||||
if (shader) {
|
if (shader) {
|
||||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||||
}
|
}
|
||||||
|
@ -477,7 +477,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||||
}
|
}
|
||||||
Register(shader);
|
|
||||||
|
if (cpu_addr) {
|
||||||
|
Register(shader);
|
||||||
|
} else {
|
||||||
|
null_shader = shader;
|
||||||
|
}
|
||||||
|
|
||||||
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
return last_shaders[static_cast<std::size_t>(program)] = shader;
|
||||||
}
|
}
|
||||||
|
@ -486,7 +491,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||||
auto& memory_manager{system.GPU().MemoryManager()};
|
auto& memory_manager{system.GPU().MemoryManager()};
|
||||||
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
const auto cpu_addr{memory_manager.GpuToCpuAddress(code_addr)};
|
||||||
|
|
||||||
auto kernel = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
auto kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||||
if (kernel) {
|
if (kernel) {
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
@ -507,7 +512,11 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
|
||||||
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
Register(kernel);
|
if (cpu_addr) {
|
||||||
|
Register(kernel);
|
||||||
|
} else {
|
||||||
|
null_kernel = kernel;
|
||||||
|
}
|
||||||
return kernel;
|
return kernel;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -125,6 +125,9 @@ private:
|
||||||
ShaderDiskCacheOpenGL disk_cache;
|
ShaderDiskCacheOpenGL disk_cache;
|
||||||
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
std::unordered_map<u64, PrecompiledShader> runtime_cache;
|
||||||
|
|
||||||
|
Shader null_shader{};
|
||||||
|
Shader null_kernel{};
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload, bool is_stubbed)
|
||||||
|
: VideoCommon::FenceBase(payload, is_stubbed), device{device}, scheduler{scheduler} {}
|
||||||
|
|
||||||
|
InnerFence::InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
|
||||||
|
u32 payload, bool is_stubbed)
|
||||||
|
: VideoCommon::FenceBase(address, payload, is_stubbed), device{device}, scheduler{scheduler} {}
|
||||||
|
|
||||||
|
InnerFence::~InnerFence() = default;
|
||||||
|
|
||||||
|
void InnerFence::Queue() {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(!event);
|
||||||
|
|
||||||
|
event = device.GetLogical().CreateEvent();
|
||||||
|
ticks = scheduler.Ticks();
|
||||||
|
|
||||||
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
|
scheduler.Record([event = *event](vk::CommandBuffer cmdbuf) {
|
||||||
|
cmdbuf.SetEvent(event, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InnerFence::IsSignaled() const {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
ASSERT(event);
|
||||||
|
return IsEventSignalled();
|
||||||
|
}
|
||||||
|
|
||||||
|
void InnerFence::Wait() {
|
||||||
|
if (is_stubbed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ASSERT(event);
|
||||||
|
|
||||||
|
if (ticks >= scheduler.Ticks()) {
|
||||||
|
scheduler.Flush();
|
||||||
|
}
|
||||||
|
while (!IsEventSignalled()) {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool InnerFence::IsEventSignalled() const {
|
||||||
|
switch (const VkResult result = event.GetStatus()) {
|
||||||
|
case VK_EVENT_SET:
|
||||||
|
return true;
|
||||||
|
case VK_EVENT_RESET:
|
||||||
|
return false;
|
||||||
|
default:
|
||||||
|
throw vk::Exception(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VKFenceManager::VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler,
|
||||||
|
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
|
||||||
|
VKQueryCache& query_cache)
|
||||||
|
: GenericFenceManager(system, rasterizer, texture_cache, buffer_cache, query_cache),
|
||||||
|
device{device}, scheduler{scheduler} {}
|
||||||
|
|
||||||
|
Fence VKFenceManager::CreateFence(u32 value, bool is_stubbed) {
|
||||||
|
return std::make_shared<InnerFence>(device, scheduler, value, is_stubbed);
|
||||||
|
}
|
||||||
|
|
||||||
|
Fence VKFenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
|
||||||
|
return std::make_shared<InnerFence>(device, scheduler, addr, value, is_stubbed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKFenceManager::QueueFence(Fence& fence) {
|
||||||
|
fence->Queue();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool VKFenceManager::IsFenceSignaled(Fence& fence) const {
|
||||||
|
return fence->IsSignaled();
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKFenceManager::WaitFence(Fence& fence) {
|
||||||
|
fence->Wait();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -0,0 +1,74 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "video_core/fence_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/wrapper.h"
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
class System;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
class RasterizerInterface;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
class VKBufferCache;
|
||||||
|
class VKDevice;
|
||||||
|
class VKQueryCache;
|
||||||
|
class VKScheduler;
|
||||||
|
class VKTextureCache;
|
||||||
|
|
||||||
|
class InnerFence : public VideoCommon::FenceBase {
|
||||||
|
public:
|
||||||
|
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, u32 payload,
|
||||||
|
bool is_stubbed);
|
||||||
|
explicit InnerFence(const VKDevice& device, VKScheduler& scheduler, GPUVAddr address,
|
||||||
|
u32 payload, bool is_stubbed);
|
||||||
|
~InnerFence();
|
||||||
|
|
||||||
|
void Queue();
|
||||||
|
|
||||||
|
bool IsSignaled() const;
|
||||||
|
|
||||||
|
void Wait();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool IsEventSignalled() const;
|
||||||
|
|
||||||
|
const VKDevice& device;
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
vk::Event event;
|
||||||
|
u64 ticks = 0;
|
||||||
|
};
|
||||||
|
using Fence = std::shared_ptr<InnerFence>;
|
||||||
|
|
||||||
|
using GenericFenceManager =
|
||||||
|
VideoCommon::FenceManager<Fence, VKTextureCache, VKBufferCache, VKQueryCache>;
|
||||||
|
|
||||||
|
class VKFenceManager final : public GenericFenceManager {
|
||||||
|
public:
|
||||||
|
explicit VKFenceManager(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler,
|
||||||
|
VKTextureCache& texture_cache, VKBufferCache& buffer_cache,
|
||||||
|
VKQueryCache& query_cache);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||||
|
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
|
||||||
|
void QueueFence(Fence& fence) override;
|
||||||
|
bool IsFenceSignaled(Fence& fence) const override;
|
||||||
|
void WaitFence(Fence& fence) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const VKDevice& device;
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -207,7 +207,7 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||||
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
const GPUVAddr program_addr{GetShaderAddress(system, program)};
|
||||||
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
const std::optional cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_shader;
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
const auto host_ptr{memory_manager.GetPointer(program_addr)};
|
||||||
|
|
||||||
|
@ -218,7 +218,11 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
|
||||||
|
|
||||||
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
|
||||||
std::move(code), stage_offset);
|
std::move(code), stage_offset);
|
||||||
Register(shader);
|
if (cpu_addr) {
|
||||||
|
Register(shader);
|
||||||
|
} else {
|
||||||
|
null_shader = shader;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
shaders[index] = std::move(shader);
|
shaders[index] = std::move(shader);
|
||||||
}
|
}
|
||||||
|
@ -261,7 +265,7 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
const auto cpu_addr = memory_manager.GpuToCpuAddress(program_addr);
|
||||||
ASSERT(cpu_addr);
|
ASSERT(cpu_addr);
|
||||||
|
|
||||||
auto shader = cpu_addr ? TryGet(*cpu_addr) : nullptr;
|
auto shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel;
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
// No shader found - create a new one
|
// No shader found - create a new one
|
||||||
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
const auto host_ptr = memory_manager.GetPointer(program_addr);
|
||||||
|
@ -271,7 +275,11 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
|
||||||
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
|
||||||
program_addr, *cpu_addr, std::move(code),
|
program_addr, *cpu_addr, std::move(code),
|
||||||
kernel_main_offset);
|
kernel_main_offset);
|
||||||
Register(shader);
|
if (cpu_addr) {
|
||||||
|
Register(shader);
|
||||||
|
} else {
|
||||||
|
null_kernel = shader;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Specialization specialization;
|
Specialization specialization;
|
||||||
|
|
|
@ -182,6 +182,9 @@ private:
|
||||||
VKUpdateDescriptorQueue& update_descriptor_queue;
|
VKUpdateDescriptorQueue& update_descriptor_queue;
|
||||||
VKRenderPassCache& renderpass_cache;
|
VKRenderPassCache& renderpass_cache;
|
||||||
|
|
||||||
|
Shader null_shader{};
|
||||||
|
Shader null_kernel{};
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||||
|
|
||||||
GraphicsPipelineCacheKey last_graphics_key;
|
GraphicsPipelineCacheKey last_graphics_key;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/engines/kepler_compute.h"
|
#include "video_core/engines/kepler_compute.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
|
||||||
|
@ -299,7 +300,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
|
||||||
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
|
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue,
|
||||||
renderpass_cache),
|
renderpass_cache),
|
||||||
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
||||||
sampler_cache(device), query_cache(system, *this, device, scheduler) {
|
sampler_cache(device),
|
||||||
|
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
|
||||||
|
query_cache(system, *this, device, scheduler) {
|
||||||
scheduler.SetQueryCache(query_cache);
|
scheduler.SetQueryCache(query_cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -360,6 +363,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
});
|
});
|
||||||
|
|
||||||
EndTransformFeedback();
|
EndTransformFeedback();
|
||||||
|
|
||||||
|
system.GPU().TickWork();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::Clear() {
|
void RasterizerVulkan::Clear() {
|
||||||
|
@ -504,6 +509,13 @@ void RasterizerVulkan::FlushRegion(VAddr addr, u64 size) {
|
||||||
query_cache.FlushRegion(addr, size);
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RasterizerVulkan::MustFlushRegion(VAddr addr, u64 size) {
|
||||||
|
if (!Settings::IsGPULevelHigh()) {
|
||||||
|
return buffer_cache.MustFlushRegion(addr, size);
|
||||||
|
}
|
||||||
|
return texture_cache.MustFlushRegion(addr, size) || buffer_cache.MustFlushRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
if (addr == 0 || size == 0) {
|
if (addr == 0 || size == 0) {
|
||||||
return;
|
return;
|
||||||
|
@ -514,6 +526,47 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size) {
|
||||||
query_cache.InvalidateRegion(addr, size);
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
|
||||||
|
if (addr == 0 || size == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
texture_cache.OnCPUWrite(addr, size);
|
||||||
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
|
buffer_cache.OnCPUWrite(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SyncGuestHost() {
|
||||||
|
texture_cache.SyncGuestHost();
|
||||||
|
buffer_cache.SyncGuestHost();
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
gpu.MemoryManager().Write<u32>(addr, value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.SignalSemaphore(addr, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
gpu.IncrementSyncPoint(value);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.SignalSyncPoint(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::ReleaseFences() {
|
||||||
|
auto& gpu{system.GPU()};
|
||||||
|
if (!gpu.IsAsync()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fence_manager.WaitPendingFences();
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
void RasterizerVulkan::FlushAndInvalidateRegion(VAddr addr, u64 size) {
|
||||||
FlushRegion(addr, size);
|
FlushRegion(addr, size);
|
||||||
InvalidateRegion(addr, size);
|
InvalidateRegion(addr, size);
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
#include "video_core/renderer_vulkan/vk_compute_pass.h"
|
||||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_fence_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
|
@ -118,7 +119,13 @@ public:
|
||||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(VAddr addr, u64 size) override;
|
void FlushRegion(VAddr addr, u64 size) override;
|
||||||
|
bool MustFlushRegion(VAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(VAddr addr, u64 size) override;
|
void InvalidateRegion(VAddr addr, u64 size) override;
|
||||||
|
void OnCPUWrite(VAddr addr, u64 size) override;
|
||||||
|
void SyncGuestHost() override;
|
||||||
|
void SignalSemaphore(GPUVAddr addr, u32 value) override;
|
||||||
|
void SignalSyncPoint(u32 value) override;
|
||||||
|
void ReleaseFences() override;
|
||||||
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
void FlushAndInvalidateRegion(VAddr addr, u64 size) override;
|
||||||
void FlushCommands() override;
|
void FlushCommands() override;
|
||||||
void TickFrame() override;
|
void TickFrame() override;
|
||||||
|
@ -261,6 +268,7 @@ private:
|
||||||
VKPipelineCache pipeline_cache;
|
VKPipelineCache pipeline_cache;
|
||||||
VKBufferCache buffer_cache;
|
VKBufferCache buffer_cache;
|
||||||
VKSamplerCache sampler_cache;
|
VKSamplerCache sampler_cache;
|
||||||
|
VKFenceManager fence_manager;
|
||||||
VKQueryCache query_cache;
|
VKQueryCache query_cache;
|
||||||
|
|
||||||
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
||||||
|
|
|
@ -63,6 +63,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkCmdSetBlendConstants);
|
X(vkCmdSetBlendConstants);
|
||||||
X(vkCmdSetDepthBias);
|
X(vkCmdSetDepthBias);
|
||||||
X(vkCmdSetDepthBounds);
|
X(vkCmdSetDepthBounds);
|
||||||
|
X(vkCmdSetEvent);
|
||||||
X(vkCmdSetScissor);
|
X(vkCmdSetScissor);
|
||||||
X(vkCmdSetStencilCompareMask);
|
X(vkCmdSetStencilCompareMask);
|
||||||
X(vkCmdSetStencilReference);
|
X(vkCmdSetStencilReference);
|
||||||
|
@ -75,6 +76,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkCreateDescriptorPool);
|
X(vkCreateDescriptorPool);
|
||||||
X(vkCreateDescriptorSetLayout);
|
X(vkCreateDescriptorSetLayout);
|
||||||
X(vkCreateDescriptorUpdateTemplateKHR);
|
X(vkCreateDescriptorUpdateTemplateKHR);
|
||||||
|
X(vkCreateEvent);
|
||||||
X(vkCreateFence);
|
X(vkCreateFence);
|
||||||
X(vkCreateFramebuffer);
|
X(vkCreateFramebuffer);
|
||||||
X(vkCreateGraphicsPipelines);
|
X(vkCreateGraphicsPipelines);
|
||||||
|
@ -93,6 +95,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkDestroyDescriptorPool);
|
X(vkDestroyDescriptorPool);
|
||||||
X(vkDestroyDescriptorSetLayout);
|
X(vkDestroyDescriptorSetLayout);
|
||||||
X(vkDestroyDescriptorUpdateTemplateKHR);
|
X(vkDestroyDescriptorUpdateTemplateKHR);
|
||||||
|
X(vkDestroyEvent);
|
||||||
X(vkDestroyFence);
|
X(vkDestroyFence);
|
||||||
X(vkDestroyFramebuffer);
|
X(vkDestroyFramebuffer);
|
||||||
X(vkDestroyImage);
|
X(vkDestroyImage);
|
||||||
|
@ -112,6 +115,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
|
||||||
X(vkFreeMemory);
|
X(vkFreeMemory);
|
||||||
X(vkGetBufferMemoryRequirements);
|
X(vkGetBufferMemoryRequirements);
|
||||||
X(vkGetDeviceQueue);
|
X(vkGetDeviceQueue);
|
||||||
|
X(vkGetEventStatus);
|
||||||
X(vkGetFenceStatus);
|
X(vkGetFenceStatus);
|
||||||
X(vkGetImageMemoryRequirements);
|
X(vkGetImageMemoryRequirements);
|
||||||
X(vkGetQueryPoolResults);
|
X(vkGetQueryPoolResults);
|
||||||
|
@ -269,6 +273,10 @@ void Destroy(VkDevice device, VkDeviceMemory handle, const DeviceDispatch& dld)
|
||||||
dld.vkFreeMemory(device, handle, nullptr);
|
dld.vkFreeMemory(device, handle, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Destroy(VkDevice device, VkEvent handle, const DeviceDispatch& dld) noexcept {
|
||||||
|
dld.vkDestroyEvent(device, handle, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
|
void Destroy(VkDevice device, VkFence handle, const DeviceDispatch& dld) noexcept {
|
||||||
dld.vkDestroyFence(device, handle, nullptr);
|
dld.vkDestroyFence(device, handle, nullptr);
|
||||||
}
|
}
|
||||||
|
@ -599,6 +607,16 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
|
||||||
return ShaderModule(object, handle, *dld);
|
return ShaderModule(object, handle, *dld);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Event Device::CreateEvent() const {
|
||||||
|
VkEventCreateInfo ci;
|
||||||
|
ci.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO;
|
||||||
|
ci.pNext = nullptr;
|
||||||
|
ci.flags = 0;
|
||||||
|
VkEvent object;
|
||||||
|
Check(dld->vkCreateEvent(handle, &ci, nullptr, &object));
|
||||||
|
return Event(object, handle, *dld);
|
||||||
|
}
|
||||||
|
|
||||||
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
|
SwapchainKHR Device::CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const {
|
||||||
VkSwapchainKHR object;
|
VkSwapchainKHR object;
|
||||||
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
|
Check(dld->vkCreateSwapchainKHR(handle, &ci, nullptr, &object));
|
||||||
|
|
|
@ -199,6 +199,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||||
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
|
PFN_vkCmdSetBlendConstants vkCmdSetBlendConstants;
|
||||||
PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
|
PFN_vkCmdSetDepthBias vkCmdSetDepthBias;
|
||||||
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
|
PFN_vkCmdSetDepthBounds vkCmdSetDepthBounds;
|
||||||
|
PFN_vkCmdSetEvent vkCmdSetEvent;
|
||||||
PFN_vkCmdSetScissor vkCmdSetScissor;
|
PFN_vkCmdSetScissor vkCmdSetScissor;
|
||||||
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
|
PFN_vkCmdSetStencilCompareMask vkCmdSetStencilCompareMask;
|
||||||
PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
|
PFN_vkCmdSetStencilReference vkCmdSetStencilReference;
|
||||||
|
@ -211,6 +212,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||||
PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
|
PFN_vkCreateDescriptorPool vkCreateDescriptorPool;
|
||||||
PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
|
PFN_vkCreateDescriptorSetLayout vkCreateDescriptorSetLayout;
|
||||||
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
|
PFN_vkCreateDescriptorUpdateTemplateKHR vkCreateDescriptorUpdateTemplateKHR;
|
||||||
|
PFN_vkCreateEvent vkCreateEvent;
|
||||||
PFN_vkCreateFence vkCreateFence;
|
PFN_vkCreateFence vkCreateFence;
|
||||||
PFN_vkCreateFramebuffer vkCreateFramebuffer;
|
PFN_vkCreateFramebuffer vkCreateFramebuffer;
|
||||||
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
|
PFN_vkCreateGraphicsPipelines vkCreateGraphicsPipelines;
|
||||||
|
@ -229,6 +231,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||||
PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
|
PFN_vkDestroyDescriptorPool vkDestroyDescriptorPool;
|
||||||
PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
|
PFN_vkDestroyDescriptorSetLayout vkDestroyDescriptorSetLayout;
|
||||||
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
|
PFN_vkDestroyDescriptorUpdateTemplateKHR vkDestroyDescriptorUpdateTemplateKHR;
|
||||||
|
PFN_vkDestroyEvent vkDestroyEvent;
|
||||||
PFN_vkDestroyFence vkDestroyFence;
|
PFN_vkDestroyFence vkDestroyFence;
|
||||||
PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
|
PFN_vkDestroyFramebuffer vkDestroyFramebuffer;
|
||||||
PFN_vkDestroyImage vkDestroyImage;
|
PFN_vkDestroyImage vkDestroyImage;
|
||||||
|
@ -248,6 +251,7 @@ struct DeviceDispatch : public InstanceDispatch {
|
||||||
PFN_vkFreeMemory vkFreeMemory;
|
PFN_vkFreeMemory vkFreeMemory;
|
||||||
PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
|
PFN_vkGetBufferMemoryRequirements vkGetBufferMemoryRequirements;
|
||||||
PFN_vkGetDeviceQueue vkGetDeviceQueue;
|
PFN_vkGetDeviceQueue vkGetDeviceQueue;
|
||||||
|
PFN_vkGetEventStatus vkGetEventStatus;
|
||||||
PFN_vkGetFenceStatus vkGetFenceStatus;
|
PFN_vkGetFenceStatus vkGetFenceStatus;
|
||||||
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
|
PFN_vkGetImageMemoryRequirements vkGetImageMemoryRequirements;
|
||||||
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
|
PFN_vkGetQueryPoolResults vkGetQueryPoolResults;
|
||||||
|
@ -279,6 +283,7 @@ void Destroy(VkDevice, VkDescriptorPool, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkDescriptorSetLayout, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkDescriptorUpdateTemplateKHR, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkDeviceMemory, const DeviceDispatch&) noexcept;
|
||||||
|
void Destroy(VkDevice, VkEvent, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkFence, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkFramebuffer, const DeviceDispatch&) noexcept;
|
||||||
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
|
void Destroy(VkDevice, VkImage, const DeviceDispatch&) noexcept;
|
||||||
|
@ -648,6 +653,15 @@ public:
|
||||||
std::vector<VkImage> GetImages() const;
|
std::vector<VkImage> GetImages() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class Event : public Handle<VkEvent, VkDevice, DeviceDispatch> {
|
||||||
|
using Handle<VkEvent, VkDevice, DeviceDispatch>::Handle;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VkResult GetStatus() const noexcept {
|
||||||
|
return dld->vkGetEventStatus(owner, handle);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
|
class Device : public Handle<VkDevice, NoOwner, DeviceDispatch> {
|
||||||
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
|
using Handle<VkDevice, NoOwner, DeviceDispatch>::Handle;
|
||||||
|
|
||||||
|
@ -695,6 +709,8 @@ public:
|
||||||
|
|
||||||
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
|
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
|
||||||
|
|
||||||
|
Event CreateEvent() const;
|
||||||
|
|
||||||
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
|
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;
|
||||||
|
|
||||||
DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
|
DeviceMemory TryAllocateMemory(const VkMemoryAllocateInfo& ai) const noexcept;
|
||||||
|
@ -938,6 +954,10 @@ public:
|
||||||
dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
|
dld->vkCmdSetDepthBounds(handle, min_depth_bounds, max_depth_bounds);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetEvent(VkEvent event, VkPipelineStageFlags stage_flags) const noexcept {
|
||||||
|
dld->vkCmdSetEvent(handle, event, stage_flags);
|
||||||
|
}
|
||||||
|
|
||||||
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
|
void BindTransformFeedbackBuffersEXT(u32 first, u32 count, const VkBuffer* buffers,
|
||||||
const VkDeviceSize* offsets,
|
const VkDeviceSize* offsets,
|
||||||
const VkDeviceSize* sizes) const noexcept {
|
const VkDeviceSize* sizes) const noexcept {
|
||||||
|
|
|
@ -192,6 +192,22 @@ public:
|
||||||
index = index_;
|
index = index_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetMemoryMarked(bool is_memory_marked_) {
|
||||||
|
is_memory_marked = is_memory_marked_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsMemoryMarked() const {
|
||||||
|
return is_memory_marked;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetSyncPending(bool is_sync_pending_) {
|
||||||
|
is_sync_pending = is_sync_pending_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsSyncPending() const {
|
||||||
|
return is_sync_pending;
|
||||||
|
}
|
||||||
|
|
||||||
void MarkAsPicked(bool is_picked_) {
|
void MarkAsPicked(bool is_picked_) {
|
||||||
is_picked = is_picked_;
|
is_picked = is_picked_;
|
||||||
}
|
}
|
||||||
|
@ -303,6 +319,8 @@ private:
|
||||||
bool is_target{};
|
bool is_target{};
|
||||||
bool is_registered{};
|
bool is_registered{};
|
||||||
bool is_picked{};
|
bool is_picked{};
|
||||||
|
bool is_memory_marked{};
|
||||||
|
bool is_sync_pending{};
|
||||||
u32 index{NO_RT};
|
u32 index{NO_RT};
|
||||||
u64 modification_tick{};
|
u64 modification_tick{};
|
||||||
};
|
};
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <array>
|
#include <array>
|
||||||
|
#include <list>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
@ -62,6 +63,30 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
for (const auto& surface : GetSurfacesInRegion(addr, size)) {
|
||||||
|
if (surface->IsMemoryMarked()) {
|
||||||
|
UnmarkMemory(surface);
|
||||||
|
surface->SetSyncPending(true);
|
||||||
|
marked_for_unregister.emplace_back(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void SyncGuestHost() {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
for (const auto& surface : marked_for_unregister) {
|
||||||
|
if (surface->IsRegistered()) {
|
||||||
|
surface->SetSyncPending(false);
|
||||||
|
Unregister(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
marked_for_unregister.clear();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Guarantees that rendertargets don't unregister themselves if the
|
* Guarantees that rendertargets don't unregister themselves if the
|
||||||
* collide. Protection is currently only done on 3D slices.
|
* collide. Protection is currently only done on 3D slices.
|
||||||
|
@ -85,10 +110,20 @@ public:
|
||||||
return a->GetModificationTick() < b->GetModificationTick();
|
return a->GetModificationTick() < b->GetModificationTick();
|
||||||
});
|
});
|
||||||
for (const auto& surface : surfaces) {
|
for (const auto& surface : surfaces) {
|
||||||
|
mutex.unlock();
|
||||||
FlushSurface(surface);
|
FlushSurface(surface);
|
||||||
|
mutex.lock();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool MustFlushRegion(VAddr addr, std::size_t size) {
|
||||||
|
std::lock_guard lock{mutex};
|
||||||
|
|
||||||
|
const auto surfaces = GetSurfacesInRegion(addr, size);
|
||||||
|
return std::any_of(surfaces.cbegin(), surfaces.cend(),
|
||||||
|
[](const TSurface& surface) { return surface->IsModified(); });
|
||||||
|
}
|
||||||
|
|
||||||
TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
|
TView GetTextureSurface(const Tegra::Texture::TICEntry& tic,
|
||||||
const VideoCommon::Shader::Sampler& entry) {
|
const VideoCommon::Shader::Sampler& entry) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
|
@ -206,8 +241,14 @@ public:
|
||||||
|
|
||||||
auto surface_view = GetSurface(gpu_addr, *cpu_addr,
|
auto surface_view = GetSurface(gpu_addr, *cpu_addr,
|
||||||
SurfaceParams::CreateForFramebuffer(system, index), true);
|
SurfaceParams::CreateForFramebuffer(system, index), true);
|
||||||
if (render_targets[index].target)
|
if (render_targets[index].target) {
|
||||||
render_targets[index].target->MarkAsRenderTarget(false, NO_RT);
|
auto& surface = render_targets[index].target;
|
||||||
|
surface->MarkAsRenderTarget(false, NO_RT);
|
||||||
|
const auto& cr_params = surface->GetSurfaceParams();
|
||||||
|
if (!cr_params.is_tiled && Settings::values.use_asynchronous_gpu_emulation) {
|
||||||
|
AsyncFlushSurface(surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
render_targets[index].target = surface_view.first;
|
render_targets[index].target = surface_view.first;
|
||||||
render_targets[index].view = surface_view.second;
|
render_targets[index].view = surface_view.second;
|
||||||
if (render_targets[index].target)
|
if (render_targets[index].target)
|
||||||
|
@ -284,6 +325,34 @@ public:
|
||||||
return ++ticks;
|
return ++ticks;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CommitAsyncFlushes() {
|
||||||
|
committed_flushes.push_back(uncommitted_flushes);
|
||||||
|
uncommitted_flushes.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
bool HasUncommittedFlushes() const {
|
||||||
|
return uncommitted_flushes != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ShouldWaitAsyncFlushes() const {
|
||||||
|
return !committed_flushes.empty() && committed_flushes.front() != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PopAsyncFlushes() {
|
||||||
|
if (committed_flushes.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto& flush_list = committed_flushes.front();
|
||||||
|
if (!flush_list) {
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (TSurface& surface : *flush_list) {
|
||||||
|
FlushSurface(surface);
|
||||||
|
}
|
||||||
|
committed_flushes.pop_front();
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
bool is_astc_supported)
|
bool is_astc_supported)
|
||||||
|
@ -345,9 +414,20 @@ protected:
|
||||||
surface->SetCpuAddr(*cpu_addr);
|
surface->SetCpuAddr(*cpu_addr);
|
||||||
RegisterInnerCache(surface);
|
RegisterInnerCache(surface);
|
||||||
surface->MarkAsRegistered(true);
|
surface->MarkAsRegistered(true);
|
||||||
|
surface->SetMemoryMarked(true);
|
||||||
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
|
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UnmarkMemory(TSurface surface) {
|
||||||
|
if (!surface->IsMemoryMarked()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const std::size_t size = surface->GetSizeInBytes();
|
||||||
|
const VAddr cpu_addr = surface->GetCpuAddr();
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
|
||||||
|
surface->SetMemoryMarked(false);
|
||||||
|
}
|
||||||
|
|
||||||
void Unregister(TSurface surface) {
|
void Unregister(TSurface surface) {
|
||||||
if (guard_render_targets && surface->IsProtected()) {
|
if (guard_render_targets && surface->IsProtected()) {
|
||||||
return;
|
return;
|
||||||
|
@ -355,9 +435,11 @@ protected:
|
||||||
if (!guard_render_targets && surface->IsRenderTarget()) {
|
if (!guard_render_targets && surface->IsRenderTarget()) {
|
||||||
ManageRenderTargetUnregister(surface);
|
ManageRenderTargetUnregister(surface);
|
||||||
}
|
}
|
||||||
const std::size_t size = surface->GetSizeInBytes();
|
UnmarkMemory(surface);
|
||||||
const VAddr cpu_addr = surface->GetCpuAddr();
|
if (surface->IsSyncPending()) {
|
||||||
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
|
marked_for_unregister.remove(surface);
|
||||||
|
surface->SetSyncPending(false);
|
||||||
|
}
|
||||||
UnregisterInnerCache(surface);
|
UnregisterInnerCache(surface);
|
||||||
surface->MarkAsRegistered(false);
|
surface->MarkAsRegistered(false);
|
||||||
ReserveSurface(surface->GetSurfaceParams(), surface);
|
ReserveSurface(surface->GetSurfaceParams(), surface);
|
||||||
|
@ -417,7 +499,7 @@ private:
|
||||||
**/
|
**/
|
||||||
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
|
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
|
||||||
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
|
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
return RecycleStrategy::Flush;
|
return RecycleStrategy::Flush;
|
||||||
}
|
}
|
||||||
// 3D Textures decision
|
// 3D Textures decision
|
||||||
|
@ -461,7 +543,7 @@ private:
|
||||||
}
|
}
|
||||||
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
|
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) {
|
||||||
case RecycleStrategy::Ignore: {
|
case RecycleStrategy::Ignore: {
|
||||||
return InitializeSurface(gpu_addr, params, Settings::values.use_accurate_gpu_emulation);
|
return InitializeSurface(gpu_addr, params, Settings::IsGPULevelExtreme());
|
||||||
}
|
}
|
||||||
case RecycleStrategy::Flush: {
|
case RecycleStrategy::Flush: {
|
||||||
std::sort(overlaps.begin(), overlaps.end(),
|
std::sort(overlaps.begin(), overlaps.end(),
|
||||||
|
@ -509,7 +591,7 @@ private:
|
||||||
}
|
}
|
||||||
const auto& final_params = new_surface->GetSurfaceParams();
|
const auto& final_params = new_surface->GetSurfaceParams();
|
||||||
if (cr_params.type != final_params.type) {
|
if (cr_params.type != final_params.type) {
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
BufferCopy(current_surface, new_surface);
|
BufferCopy(current_surface, new_surface);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -598,7 +680,7 @@ private:
|
||||||
if (passed_tests == 0) {
|
if (passed_tests == 0) {
|
||||||
return {};
|
return {};
|
||||||
// In Accurate GPU all tests should pass, else we recycle
|
// In Accurate GPU all tests should pass, else we recycle
|
||||||
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
|
} else if (Settings::IsGPULevelExtreme() && passed_tests != overlaps.size()) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
for (const auto& surface : overlaps) {
|
for (const auto& surface : overlaps) {
|
||||||
|
@ -668,7 +750,7 @@ private:
|
||||||
for (const auto& surface : overlaps) {
|
for (const auto& surface : overlaps) {
|
||||||
if (!surface->MatchTarget(params.target)) {
|
if (!surface->MatchTarget(params.target)) {
|
||||||
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
|
if (overlaps.size() == 1 && surface->GetCpuAddr() == cpu_addr) {
|
||||||
if (Settings::values.use_accurate_gpu_emulation) {
|
if (Settings::IsGPULevelExtreme()) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
Unregister(surface);
|
Unregister(surface);
|
||||||
|
@ -1106,6 +1188,13 @@ private:
|
||||||
TView view;
|
TView view;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void AsyncFlushSurface(TSurface& surface) {
|
||||||
|
if (!uncommitted_flushes) {
|
||||||
|
uncommitted_flushes = std::make_shared<std::list<TSurface>>();
|
||||||
|
}
|
||||||
|
uncommitted_flushes->push_back(surface);
|
||||||
|
}
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
||||||
FormatLookupTable format_lookup_table;
|
FormatLookupTable format_lookup_table;
|
||||||
|
@ -1150,6 +1239,11 @@ private:
|
||||||
std::unordered_map<u32, TSurface> invalid_cache;
|
std::unordered_map<u32, TSurface> invalid_cache;
|
||||||
std::vector<u8> invalid_memory;
|
std::vector<u8> invalid_memory;
|
||||||
|
|
||||||
|
std::list<TSurface> marked_for_unregister;
|
||||||
|
|
||||||
|
std::shared_ptr<std::list<TSurface>> uncommitted_flushes{};
|
||||||
|
std::list<std::shared_ptr<std::list<TSurface>>> committed_flushes;
|
||||||
|
|
||||||
StagingCache staging_cache;
|
StagingCache staging_cache;
|
||||||
std::recursive_mutex mutex;
|
std::recursive_mutex mutex;
|
||||||
};
|
};
|
||||||
|
|
|
@ -639,8 +639,8 @@ void Config::ReadRendererValues() {
|
||||||
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
|
Settings::values.frame_limit = ReadSetting(QStringLiteral("frame_limit"), 100).toInt();
|
||||||
Settings::values.use_disk_shader_cache =
|
Settings::values.use_disk_shader_cache =
|
||||||
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
|
ReadSetting(QStringLiteral("use_disk_shader_cache"), true).toBool();
|
||||||
Settings::values.use_accurate_gpu_emulation =
|
const int gpu_accuracy_level = ReadSetting(QStringLiteral("gpu_accuracy"), 0).toInt();
|
||||||
ReadSetting(QStringLiteral("use_accurate_gpu_emulation"), false).toBool();
|
Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
|
||||||
Settings::values.use_asynchronous_gpu_emulation =
|
Settings::values.use_asynchronous_gpu_emulation =
|
||||||
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
|
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
|
||||||
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
|
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
|
||||||
|
@ -1080,8 +1080,8 @@ void Config::SaveRendererValues() {
|
||||||
WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
|
WriteSetting(QStringLiteral("frame_limit"), Settings::values.frame_limit, 100);
|
||||||
WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
|
WriteSetting(QStringLiteral("use_disk_shader_cache"), Settings::values.use_disk_shader_cache,
|
||||||
true);
|
true);
|
||||||
WriteSetting(QStringLiteral("use_accurate_gpu_emulation"),
|
WriteSetting(QStringLiteral("gpu_accuracy"), static_cast<int>(Settings::values.gpu_accuracy),
|
||||||
Settings::values.use_accurate_gpu_emulation, false);
|
0);
|
||||||
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
|
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
|
||||||
Settings::values.use_asynchronous_gpu_emulation, false);
|
Settings::values.use_asynchronous_gpu_emulation, false);
|
||||||
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
||||||
|
|
|
@ -19,7 +19,7 @@ ConfigureGraphicsAdvanced::~ConfigureGraphicsAdvanced() = default;
|
||||||
|
|
||||||
void ConfigureGraphicsAdvanced::SetConfiguration() {
|
void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
|
const bool runtime_lock = !Core::System::GetInstance().IsPoweredOn();
|
||||||
ui->use_accurate_gpu_emulation->setChecked(Settings::values.use_accurate_gpu_emulation);
|
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
|
||||||
ui->use_vsync->setEnabled(runtime_lock);
|
ui->use_vsync->setEnabled(runtime_lock);
|
||||||
ui->use_vsync->setChecked(Settings::values.use_vsync);
|
ui->use_vsync->setChecked(Settings::values.use_vsync);
|
||||||
ui->force_30fps_mode->setEnabled(runtime_lock);
|
ui->force_30fps_mode->setEnabled(runtime_lock);
|
||||||
|
@ -29,7 +29,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||||
Settings::values.use_accurate_gpu_emulation = ui->use_accurate_gpu_emulation->isChecked();
|
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
|
||||||
|
Settings::values.gpu_accuracy = gpu_accuracy;
|
||||||
Settings::values.use_vsync = ui->use_vsync->isChecked();
|
Settings::values.use_vsync = ui->use_vsync->isChecked();
|
||||||
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
|
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
|
||||||
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
|
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
|
||||||
|
|
|
@ -23,11 +23,34 @@
|
||||||
</property>
|
</property>
|
||||||
<layout class="QVBoxLayout" name="verticalLayout_3">
|
<layout class="QVBoxLayout" name="verticalLayout_3">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="use_accurate_gpu_emulation">
|
<layout class="QHBoxLayout" name="horizontalLayout_2">
|
||||||
<property name="text">
|
<item>
|
||||||
<string>Use accurate GPU emulation (slow)</string>
|
<widget class="QLabel" name="label_gpu_accuracy">
|
||||||
</property>
|
<property name="text">
|
||||||
</widget>
|
<string>Accuracy Level:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QComboBox" name="gpu_accuracy">
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">Normal</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">High</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string notr="true">Extreme(very slow)</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="use_vsync">
|
<widget class="QCheckBox" name="use_vsync">
|
||||||
|
|
|
@ -388,8 +388,8 @@ void Config::ReadValues() {
|
||||||
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
|
static_cast<u16>(sdl2_config->GetInteger("Renderer", "frame_limit", 100));
|
||||||
Settings::values.use_disk_shader_cache =
|
Settings::values.use_disk_shader_cache =
|
||||||
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
|
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
|
||||||
Settings::values.use_accurate_gpu_emulation =
|
const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
|
||||||
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
|
Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
|
||||||
Settings::values.use_asynchronous_gpu_emulation =
|
Settings::values.use_asynchronous_gpu_emulation =
|
||||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
||||||
Settings::values.use_vsync =
|
Settings::values.use_vsync =
|
||||||
|
|
|
@ -146,9 +146,9 @@ frame_limit =
|
||||||
# 0 (default): Off, 1 : On
|
# 0 (default): Off, 1 : On
|
||||||
use_disk_shader_cache =
|
use_disk_shader_cache =
|
||||||
|
|
||||||
# Whether to use accurate GPU emulation
|
# Which gpu accuracy level to use
|
||||||
# 0 (default): Off (fast), 1 : On (slow)
|
# 0 (Normal), 1 (High), 2 (Extreme)
|
||||||
use_accurate_gpu_emulation =
|
gpu_accuracy =
|
||||||
|
|
||||||
# Whether to use asynchronous GPU emulation
|
# Whether to use asynchronous GPU emulation
|
||||||
# 0 : Off (slow), 1 (default): On (fast)
|
# 0 : Off (slow), 1 (default): On (fast)
|
||||||
|
|
|
@ -126,8 +126,8 @@ void Config::ReadValues() {
|
||||||
Settings::values.frame_limit = 100;
|
Settings::values.frame_limit = 100;
|
||||||
Settings::values.use_disk_shader_cache =
|
Settings::values.use_disk_shader_cache =
|
||||||
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
|
sdl2_config->GetBoolean("Renderer", "use_disk_shader_cache", false);
|
||||||
Settings::values.use_accurate_gpu_emulation =
|
const int gpu_accuracy_level = sdl2_config->GetInteger("Renderer", "gpu_accuracy", 0);
|
||||||
sdl2_config->GetBoolean("Renderer", "use_accurate_gpu_emulation", false);
|
Settings::values.gpu_accuracy = static_cast<Settings::GPUAccuracy>(gpu_accuracy_level);
|
||||||
Settings::values.use_asynchronous_gpu_emulation =
|
Settings::values.use_asynchronous_gpu_emulation =
|
||||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
||||||
|
|
||||||
|
|
Reference in New Issue