From 31a76410e8fa09462d960c10148c075125dc385a Mon Sep 17 00:00:00 2001 From: ameerj Date: Sun, 2 Aug 2020 13:05:41 -0400 Subject: [PATCH] Address feedback, add shader compile notifier, update setting text --- src/video_core/renderer_vulkan/vk_device.h | 5 +- .../renderer_vulkan/vk_graphics_pipeline.cpp | 9 +- .../renderer_vulkan/vk_graphics_pipeline.h | 30 ++++- .../renderer_vulkan/vk_pipeline_cache.cpp | 23 ++-- .../renderer_vulkan/vk_pipeline_cache.h | 58 ---------- .../renderer_vulkan/vk_rasterizer.cpp | 19 ++-- src/video_core/shader/async_shaders.cpp | 106 +++++++++--------- src/video_core/shader/async_shaders.h | 23 ++-- .../configure_graphics_advanced.ui | 2 +- 9 files changed, 115 insertions(+), 160 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index 30cd3e189..26a233db1 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -202,6 +202,7 @@ public: return reported_extensions; } + /// Returns true if the setting for async shader compilation is enabled. bool UseAsynchronousShaders() const { return use_asynchronous_shaders; } @@ -255,7 +256,9 @@ private: bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. - bool use_asynchronous_shaders{}; + + // Asynchronous Graphics Pipeline setting + bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline // Telemetry parameters std::string vendor_name; ///< Device's driver name. diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 7d51b9836..5dc4cd5af 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -78,14 +78,15 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche const GraphicsPipelineCacheKey& key, vk::Span bindings, const SPIRVProgram& program) - : device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, + : device{device}, scheduler{scheduler}, hash{key.Hash()}, cache_key{key}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_allocator{descriptor_pool, *descriptor_set_layout}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( program)}, - renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, - pipeline{CreatePipeline(key.renderpass_params, program)}, m_key{key} {} + renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( + key.renderpass_params, + program)} {} VKGraphicsPipeline::~VKGraphicsPipeline() = default; @@ -180,7 +181,7 @@ std::vector VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { - const auto& state = fixed_state; + const auto& state = cache_key.fixed_state; const auto& viewport_swizzles = state.viewport_swizzles; FixedPipelineState::DynamicState dynamic; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h index d50bd347c..9d462db0a 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h @@ -19,7 +19,27 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey; +struct GraphicsPipelineCacheKey { + RenderPassParams renderpass_params; + u32 padding; + std::array shaders; + FixedPipelineState fixed_state; + + std::size_t Hash() const noexcept; + + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); + } + + std::size_t Size() const noexcept { + return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); + } +}; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); class VKDescriptorPool; class VKDevice; @@ -54,8 +74,8 @@ public: return renderpass; } - const GraphicsPipelineCacheKey& GetCacheKey() const { - return m_key; + GraphicsPipelineCacheKey GetCacheKey() const { + return cache_key; } private: @@ -74,8 +94,8 @@ private: const VKDevice& device; VKScheduler& scheduler; - const FixedPipelineState fixed_state; const u64 hash; + GraphicsPipelineCacheKey cache_key; vk::DescriptorSetLayout descriptor_set_layout; DescriptorAllocator descriptor_allocator; @@ -86,8 +106,6 @@ private: VkRenderPass renderpass; vk::Pipeline pipeline; - - const GraphicsPipelineCacheKey& m_key; }; } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 1a8b2c62b..20ffbeb38 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -28,6 +28,7 @@ #include "video_core/shader/compiler_settings.h" #include "video_core/shader/memory_util.h" #include "video_core/shader_cache.h" +#include "video_core/shader_notify.h" namespace Vulkan { @@ -214,27 +215,31 @@ VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline( } last_graphics_key = key; - if (device.UseAsynchronousShaders()) { + if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) { std::unique_lock lock{pipeline_cache}; const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); - async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params, - key.padding, key.shaders, key.fixed_state); + async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool, + update_descriptor_queue, renderpass_cache, bindings, + program, key); } - last_graphics_pipeline = graphics_cache.at(key).get(); + last_graphics_pipeline = pair->second.get(); return last_graphics_pipeline; } const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); auto& entry = pair->second; if (is_cache_miss) { + system.GPU().ShaderNotify().MarkSharderBuilding(); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); const auto [program, bindings] = DecompileShaders(key.fixed_state); entry = std::make_unique(device, scheduler, descriptor_pool, update_descriptor_queue, renderpass_cache, key, bindings, program); + system.GPU().ShaderNotify().MarkShaderComplete(); } last_graphics_pipeline = entry.get(); return last_graphics_pipeline; @@ -294,14 +299,8 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach void VKPipelineCache::EmplacePipeline(std::unique_ptr pipeline) { std::unique_lock lock{pipeline_cache}; - const auto [pair, is_cache_miss] = graphics_cache.try_emplace(pipeline->GetCacheKey()); - auto& entry = pair->second; - if (entry) { - LOG_INFO(Render_Vulkan, "Pipeline already here 0x{:016X}", pipeline->GetCacheKey().Hash()); - duplicates.push_back(std::move(pipeline)); - } else { - entry = std::move(pipeline); - } + graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline); + system.GPU().ShaderNotify().MarkShaderComplete(); } void VKPipelineCache::OnShaderRemoval(Shader* shader) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 777ef2038..c04829e77 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -44,28 +44,6 @@ class VKUpdateDescriptorQueue; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct GraphicsPipelineCacheKey { - RenderPassParams renderpass_params; - u32 padding; - std::array shaders; - FixedPipelineState fixed_state; - - std::size_t Hash() const noexcept; - - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; - - bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { - return !operator==(rhs); - } - - std::size_t Size() const noexcept { - return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size(); - } -}; -static_assert(std::has_unique_object_representations_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_constructible_v); - struct ComputePipelineCacheKey { GPUVAddr shader; u32 shared_memory_size; @@ -158,41 +136,6 @@ public: VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); - const VKDevice& GetDevice() const { - return device; - } - - VKScheduler& GetScheduler() { - return scheduler; - } - const VKScheduler& GetScheduler() const { - return scheduler; - } - - VKDescriptorPool& GetDescriptorPool() { - return descriptor_pool; - } - - const VKDescriptorPool& GetDescriptorPool() const { - return descriptor_pool; - } - - VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() { - return update_descriptor_queue; - } - - const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const { - return update_descriptor_queue; - } - - VKRenderPassCache& GetRenderpassCache() { - return renderpass_cache; - } - - const VKRenderPassCache& GetRenderpassCache() const { - return renderpass_cache; - } - void EmplacePipeline(std::unique_ptr pipeline); protected: @@ -216,7 +159,6 @@ private: GraphicsPipelineCacheKey last_graphics_key; VKGraphicsPipeline* last_graphics_pipeline = nullptr; - std::vector> duplicates; std::mutex pipeline_cache; std::unordered_map> diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index fc1b51a96..720802ad5 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -14,6 +14,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" +#include "common/scope_exit.h" #include "core/core.h" #include "core/settings.h" #include "video_core/engines/kepler_compute.h" @@ -408,15 +409,10 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind // Max worker threads we should allow constexpr u32 MAX_THREADS = 4; - // Amount of threads we should reserve for other parts of yuzu - constexpr u32 RESERVED_THREADS = 6; - // Get the amount of threads we can use(this can return zero) - const auto cpu_thread_count = - std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); - // Deduce how many "extra" threads we have to use. - const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS; + // Deduce how many threads we can use + const auto threads_used = std::thread::hardware_concurrency() / 4; // Always allow at least 1 thread regardless of our settings - const auto max_worker_count = std::max(1u, max_threads_unused); + const auto max_worker_count = std::max(1U, threads_used); // Don't use more than MAX_THREADS const auto worker_count = std::min(max_worker_count, MAX_THREADS); async_shaders.AllocateWorkers(worker_count); @@ -432,6 +428,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); + SCOPE_EXIT({ system.GPU().TickWork(); }); + const auto& gpu = system.GPU().Maxwell3D(); GraphicsPipelineCacheKey key; key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); @@ -458,10 +456,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { key.renderpass_params = GetRenderPassParams(texceptions); key.padding = 0; - auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); + auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. - system.GPU().TickWork(); return; } @@ -488,8 +485,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { }); EndTransformFeedback(); - - system.GPU().TickWork(); } void RasterizerVulkan::Clear() { diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp index 54a81460b..ea813d506 100644 --- a/src/video_core/shader/async_shaders.cpp +++ b/src/video_core/shader/async_shaders.cpp @@ -2,7 +2,6 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include #include #include @@ -111,38 +110,44 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device, VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr) { - auto params = std::make_unique(); - params->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL; - params->device = &device; - params->shader_type = shader_type; - params->uid = uid; - params->code = std::move(code); - params->code_b = std::move(code_b); - params->main_offset = main_offset; - params->compiler_settings = compiler_settings; - params->registry = ®istry; - params->cpu_address = cpu_addr; + WorkerParams params{ + .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL, + .device = &device, + .shader_type = shader_type, + .uid = uid, + .code = std::move(code), + .code_b = std::move(code_b), + .main_offset = main_offset, + .compiler_settings = compiler_settings, + .registry = ®istry, + .cpu_address = cpu_addr, + }; std::unique_lock lock(queue_mutex); pending_queue.push(std::move(params)); cv.notify_one(); } -void AsyncShaders::QueueVulkanShader( - Vulkan::VKPipelineCache* pp_cache, std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, u32 padding, - std::array shaders, - Vulkan::FixedPipelineState fixed_state) { +void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, + std::vector bindings, + Vulkan::SPIRVProgram program, + Vulkan::GraphicsPipelineCacheKey key) { - auto params = std::make_unique(); - - params->backend = Backend::Vulkan; - params->pp_cache = pp_cache; - params->bindings = bindings; - params->program = program; - params->renderpass_params = renderpass_params; - params->padding = padding; - params->shaders = shaders; - params->fixed_state = fixed_state; + WorkerParams params{ + .backend = Backend::Vulkan, + .pp_cache = pp_cache, + .vk_device = &device, + .scheduler = &scheduler, + .descriptor_pool = &descriptor_pool, + .update_descriptor_queue = &update_descriptor_queue, + .renderpass_cache = &renderpass_cache, + .bindings = bindings, + .program = program, + .key = key, + }; std::unique_lock lock(queue_mutex); pending_queue.push(std::move(params)); @@ -150,7 +155,6 @@ void AsyncShaders::QueueVulkanShader( } void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { - using namespace std::chrono_literals; while (!is_thread_exiting.load(std::memory_order_relaxed)) { std::unique_lock lock{queue_mutex}; cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); @@ -168,53 +172,43 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context } // Pull work from queue - auto work = std::move(pending_queue.front()); + WorkerParams work = std::move(pending_queue.front()); pending_queue.pop(); lock.unlock(); - if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) { - VideoCommon::Shader::Registry registry = *work->registry; - const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry); + if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) { + VideoCommon::Shader::Registry registry = *work.registry; + const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry); const auto scope = context->Acquire(); auto program = - OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry); + OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry); Result result{}; - result.backend = work->backend; - result.cpu_address = work->cpu_address; - result.uid = work->uid; - result.code = std::move(work->code); - result.code_b = std::move(work->code_b); - result.shader_type = work->shader_type; + result.backend = work.backend; + result.cpu_address = work.cpu_address; + result.uid = work.uid; + result.code = std::move(work.code); + result.code_b = std::move(work.code_b); + result.shader_type = work.shader_type; - if (work->backend == Backend::OpenGL) { + if (work.backend == Backend::OpenGL) { result.program.opengl = std::move(program->source_program); - } else if (work->backend == Backend::GLASM) { + } else if (work.backend == Backend::GLASM) { result.program.glasm = std::move(program->assembly_program); } - work.reset(); { std::unique_lock complete_lock(completed_mutex); finished_work.push_back(std::move(result)); } - } else if (work->backend == Backend::Vulkan) { - Vulkan::GraphicsPipelineCacheKey params_key{ - .renderpass_params = work->renderpass_params, - .padding = work->padding, - .shaders = work->shaders, - .fixed_state = work->fixed_state, - }; + } else if (work.backend == Backend::Vulkan) { auto pipeline = std::make_unique( - work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(), - work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(), - work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program); + *work.vk_device, *work.scheduler, *work.descriptor_pool, + *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings, + work.program); - work->pp_cache->EmplacePipeline(std::move(pipeline)); - work.reset(); + work.pp_cache->EmplacePipeline(std::move(pipeline)); } - // Give a chance for another thread to get work. - std::this_thread::yield(); } } diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h index d4eeb8fb6..7c10bd63f 100644 --- a/src/video_core/shader/async_shaders.h +++ b/src/video_core/shader/async_shaders.h @@ -86,12 +86,13 @@ public: VideoCommon::Shader::CompilerSettings compiler_settings, const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); - void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, + void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device, + Vulkan::VKScheduler& scheduler, + Vulkan::VKDescriptorPool& descriptor_pool, + Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue, + Vulkan::VKRenderPassCache& renderpass_cache, std::vector bindings, - Vulkan::SPIRVProgram program, Vulkan::RenderPassParams renderpass_params, - u32 padding, - std::array shaders, - Vulkan::FixedPipelineState fixed_state); + Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key); private: void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); @@ -114,12 +115,14 @@ private: // For Vulkan Vulkan::VKPipelineCache* pp_cache; + const Vulkan::VKDevice* vk_device; + Vulkan::VKScheduler* scheduler; + Vulkan::VKDescriptorPool* descriptor_pool; + Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue; + Vulkan::VKRenderPassCache* renderpass_cache; std::vector bindings; Vulkan::SPIRVProgram program; - Vulkan::RenderPassParams renderpass_params; - u32 padding; - std::array shaders; - Vulkan::FixedPipelineState fixed_state; + Vulkan::GraphicsPipelineCacheKey key; }; std::condition_variable cv; @@ -128,7 +131,7 @@ private: std::atomic is_thread_exiting{}; std::vector> context_list; std::vector worker_threads; - std::queue> pending_queue; + std::queue pending_queue; std::vector finished_work; Core::Frontend::EmuWindow& emu_window; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index a793c803d..846a30586 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -92,7 +92,7 @@ Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental. - Use asynchronous shader building (experimental, OpenGL or Assembly shaders only) + Use asynchronous shader building (experimental)