From 74f683787eeba7b6e8f5868134f445240733f8fd Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Mon, 14 Jun 2021 21:06:29 -0400 Subject: [PATCH] gl_shader_cache: Implement async shaders --- src/video_core/CMakeLists.txt | 1 + .../renderer_opengl/gl_graphics_pipeline.cpp | 137 ++++++++++-------- .../renderer_opengl/gl_graphics_pipeline.h | 14 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- .../renderer_opengl/gl_shader_cache.cpp | 54 ++++--- .../renderer_opengl/gl_shader_cache.h | 34 ++--- .../renderer_opengl/gl_shader_context.h | 33 +++++ 7 files changed, 161 insertions(+), 114 deletions(-) create mode 100644 src/video_core/renderer_opengl/gl_shader_context.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 1ef3a6189..007ecc13e 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -83,6 +83,7 @@ add_library(video_core STATIC renderer_opengl/gl_shader_cache.h renderer_opengl/gl_shader_manager.cpp renderer_opengl/gl_shader_manager.h + renderer_opengl/gl_shader_context.h renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_shader_util.h renderer_opengl/gl_state_tracker.cpp diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index d64723d6b..d27a3cf46 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -6,11 +6,13 @@ #include #include "common/cityhash.h" +#include "common/thread_worker.h" #include "shader_recompiler/shader_info.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_state_tracker.h" +#include "video_core/shader_notify.h" #include "video_core/texture_cache/texture_cache.h" namespace OpenGL { @@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, + VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state) : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} { + if (shader_notify) { + shader_notify->MarkShaderBuilding(); + } std::ranges::transform(infos, stage_infos.begin(), [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); - if (device.UseAssemblyShaders()) { - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{assembly_sources[stage]}; - if (code.empty()) { - continue; + auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable { + if (device.UseAssemblyShaders()) { + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); + enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; } - assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage)); - enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage; + } else { + program.handle = glCreateProgram(); + for (size_t stage = 0; stage < 5; ++stage) { + const auto code{sources[stage]}; + if (code.empty()) { + continue; + } + AttachShader(Stage(stage), program.handle, code); + } + LinkProgram(program.handle); } + if (shader_notify) { + shader_notify->MarkShaderComplete(); + } + u32 num_textures{}; + u32 num_images{}; + u32 num_storage_buffers{}; + for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { + const auto& info{stage_infos[stage]}; + if (stage < 4) { + base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; + base_storage_bindings[stage + 1] = base_storage_bindings[stage]; + + base_uniform_bindings[stage + 1] += + AccumulateCount(info.constant_buffer_descriptors); + base_storage_bindings[stage + 1] += + AccumulateCount(info.storage_buffers_descriptors); + } + enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; + std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); + + const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; + num_texture_buffers[stage] += num_tex_buffer_bindings; + num_textures += num_tex_buffer_bindings; + + const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; + num_image_buffers[stage] += num_img_buffers_bindings; + num_images += num_img_buffers_bindings; + + num_textures += AccumulateCount(info.texture_descriptors); + num_images += AccumulateCount(info.image_descriptors); + num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); + + writes_global_memory |= std::ranges::any_of( + info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + } + ASSERT(num_textures <= MAX_TEXTURES); + ASSERT(num_images <= MAX_IMAGES); + + const bool assembly_shaders{assembly_programs[0].handle != 0}; + use_storage_buffers = + !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); + writes_global_memory &= !use_storage_buffers; + + if (assembly_shaders && xfb_state) { + GenerateTransformFeedbackState(*xfb_state); + } + is_built.store(true, std::memory_order_relaxed); + }}; + if (thread_worker) { + thread_worker->QueueWork(std::move(func)); } else { - program.handle = glCreateProgram(); - for (size_t stage = 0; stage < 5; ++stage) { - const auto code{glsl_sources[stage]}; - if (code.empty()) { - continue; - } - AttachShader(Stage(stage), program.handle, code); - } - LinkProgram(program.handle); - } - u32 num_textures{}; - u32 num_images{}; - u32 num_storage_buffers{}; - for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) { - const auto& info{stage_infos[stage]}; - if (stage < 4) { - base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; - base_storage_bindings[stage + 1] = base_storage_bindings[stage]; - - base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors); - base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors); - } - enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask; - std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin()); - - const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)}; - num_texture_buffers[stage] += num_tex_buffer_bindings; - num_textures += num_tex_buffer_bindings; - - const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)}; - num_image_buffers[stage] += num_img_buffers_bindings; - num_images += num_img_buffers_bindings; - - num_textures += AccumulateCount(info.texture_descriptors); - num_images += AccumulateCount(info.image_descriptors); - num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors); - - writes_global_memory |= std::ranges::any_of( - info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); - } - ASSERT(num_textures <= MAX_TEXTURES); - ASSERT(num_images <= MAX_IMAGES); - - const bool assembly_shaders{assembly_programs[0].handle != 0}; - use_storage_buffers = - !assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks(); - writes_global_memory &= !use_storage_buffers; - - if (assembly_shaders && xfb_state) { - GenerateTransformFeedbackState(*xfb_state); + func(nullptr); } } diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index dc791be53..58deafd3c 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -20,10 +20,15 @@ namespace OpenGL { +namespace ShaderContext { +struct Context; +} + class Device; class ProgramManager; using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using ShaderWorker = Common::StatefulThreadWorker; struct GraphicsPipelineKey { std::array unique_hashes; @@ -65,8 +70,8 @@ public: BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, ProgramManager& program_manager_, StateTracker& state_tracker_, - std::array assembly_sources, - std::array glsl_sources, + ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify, + std::array sources, const std::array& infos, const VideoCommon::TransformFeedbackState* xfb_state); @@ -82,6 +87,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool IsBuilt() const noexcept { + return is_built.load(std::memory_order::relaxed); + } + private: void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state); @@ -108,6 +117,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + std::atomic_bool is_built{false}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 7513bd071..e3d336f86 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra buffer_cache_runtime(device), buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, - buffer_cache, program_manager, state_tracker), + buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()), query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 3d59d34d7..d082b9f73 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -17,7 +17,6 @@ #include "common/scope_exit.h" #include "common/thread_worker.h" #include "core/core.h" -#include "core/frontend/emu_window.h" #include "shader_recompiler/backend/glasm/emit_glasm.h" #include "shader_recompiler/backend/glsl/emit_glsl.h" #include "shader_recompiler/backend/spirv/emit_spirv.h" @@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; using VideoCommon::SerializePipeline; +using Context = ShaderContext::Context; template auto MakeSpan(Container& container) { @@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs } } // Anonymous namespace -struct ShaderCache::Context { - explicit Context(Core::Frontend::EmuWindow& emu_window) - : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} - - std::unique_ptr gl_context; - Core::Frontend::GraphicsContext::Scoped scoped; - ShaderPools pools; -}; - ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_) + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_) : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_}, - use_asynchronous_shaders{device.UseAsynchronousShaders()}, + shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()}, profile{ .supported_spirv = 0x00010000, @@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading, env_ptrs.push_back(&env); } ctx->pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))}; + auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)}; std::lock_guard lock{state.mutex}; if (pipeline) { graphics_cache.emplace(key, std::move(pipeline)); @@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() { if (is_new) { program = CreateGraphicsPipeline(); } + if (!program || !program->IsBuilt()) { + return nullptr; + } return program.get(); } @@ -339,7 +334,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { GetGraphicsEnvironments(environments, graphics_key.unique_hashes); main_pools.ReleaseContents(); - auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())}; + auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), + use_asynchronous_shaders)}; if (!pipeline || shader_cache_filename.empty()) { return pipeline; } @@ -354,8 +350,8 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline() { } std::unique_ptr ShaderCache::CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs) try { + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); size_t env_index{}; u32 total_storage_buffers{}; @@ -394,8 +390,7 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( std::array infos{}; OGLProgram source_program; - std::array assembly_sources; - std::array glsl_sources; + std::array sources; Shader::Backend::Bindings binding; const bool use_glasm{device.UseAssemblyShaders()}; const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0; @@ -412,14 +407,16 @@ std::unique_ptr ShaderCache::CreateGraphicsPipeline( const auto runtime_info{ MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)}; if (use_glasm) { - assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding); } else { - glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); + sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding); } } + auto* const thread_worker{build_in_parallel ? workers.get() : nullptr}; + VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr}; return std::make_unique( device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker, - assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); + thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr); } catch (Shader::Exception& exception) { LOG_ERROR(Render_OpenGL, "{}", exception.what()); @@ -442,9 +439,9 @@ std::unique_ptr ShaderCache::CreateComputePipeline( return pipeline; } -std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& pools, - const ComputePipelineKey& key, - Shader::Environment& env) try { +std::unique_ptr ShaderCache::CreateComputePipeline( + ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, + Shader::Environment& env) try { LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; @@ -465,11 +462,10 @@ std::unique_ptr ShaderCache::CreateComputePipeline(ShaderPools& return nullptr; } -std::unique_ptr> ShaderCache::CreateWorkers() - const { - return std::make_unique>( - std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder", - [this] { return Context{emu_window}; }); +std::unique_ptr ShaderCache::CreateWorkers() const { + return std::make_unique(std::max(std::thread::hardware_concurrency(), 2U) - 1, + "yuzu:ShaderBuilder", + [this] { return Context{emu_window}; }); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index e0c5a06d8..d24b54d90 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -13,13 +13,12 @@ #include "common/common_types.h" #include "common/thread_worker.h" -#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/value.h" -#include "shader_recompiler/frontend/maxwell/control_flow.h" #include "shader_recompiler/object_pool.h" #include "video_core/engines/shader_type.h" #include "video_core/renderer_opengl/gl_compute_pipeline.h" #include "video_core/renderer_opengl/gl_graphics_pipeline.h" +#include "video_core/renderer_opengl/gl_shader_context.h" #include "video_core/shader_cache.h" namespace Tegra { @@ -31,29 +30,17 @@ namespace OpenGL { class Device; class ProgramManager; class RasterizerOpenGL; - -struct ShaderPools { - void ReleaseContents() { - flow_block.ReleaseContents(); - block.ReleaseContents(); - inst.ReleaseContents(); - } - - Shader::ObjectPool inst; - Shader::ObjectPool block; - Shader::ObjectPool flow_block; -}; +using ShaderWorker = Common::StatefulThreadWorker; class ShaderCache : public VideoCommon::ShaderCache { - struct Context; - public: explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::Engines::KeplerCompute& kepler_compute_, Tegra::MemoryManager& gpu_memory_, const Device& device_, TextureCache& texture_cache_, BufferCache& buffer_cache_, - ProgramManager& program_manager_, StateTracker& state_tracker_); + ProgramManager& program_manager_, StateTracker& state_tracker_, + VideoCore::ShaderNotify& shader_notify_); ~ShaderCache(); void LoadDiskResources(u64 title_id, std::stop_token stop_loading, @@ -67,17 +54,17 @@ private: std::unique_ptr CreateGraphicsPipeline(); std::unique_ptr CreateGraphicsPipeline( - ShaderPools& pools, const GraphicsPipelineKey& key, - std::span envs); + ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key, + std::span envs, bool build_in_parallel); std::unique_ptr CreateComputePipeline(const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader); - std::unique_ptr CreateComputePipeline(ShaderPools& pools, + std::unique_ptr CreateComputePipeline(ShaderContext::ShaderPools& pools, const ComputePipelineKey& key, Shader::Environment& env); - std::unique_ptr> CreateWorkers() const; + std::unique_ptr CreateWorkers() const; Core::Frontend::EmuWindow& emu_window; const Device& device; @@ -85,17 +72,18 @@ private: BufferCache& buffer_cache; ProgramManager& program_manager; StateTracker& state_tracker; + VideoCore::ShaderNotify& shader_notify; GraphicsPipelineKey graphics_key{}; const bool use_asynchronous_shaders; - ShaderPools main_pools; + ShaderContext::ShaderPools main_pools; std::unordered_map> graphics_cache; std::unordered_map> compute_cache; Shader::Profile profile; std::filesystem::path shader_cache_filename; - std::unique_ptr> workers; + std::unique_ptr workers; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_context.h b/src/video_core/renderer_opengl/gl_shader_context.h new file mode 100644 index 000000000..6ff34e5d6 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_shader_context.h @@ -0,0 +1,33 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "core/frontend/emu_window.h" +#include "shader_recompiler/frontend/ir/basic_block.h" +#include "shader_recompiler/frontend/maxwell/control_flow.h" + +namespace OpenGL::ShaderContext { +struct ShaderPools { + void ReleaseContents() { + flow_block.ReleaseContents(); + block.ReleaseContents(); + inst.ReleaseContents(); + } + + Shader::ObjectPool inst; + Shader::ObjectPool block; + Shader::ObjectPool flow_block; +}; + +struct Context { + explicit Context(Core::Frontend::EmuWindow& emu_window) + : gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {} + + std::unique_ptr gl_context; + Core::Frontend::GraphicsContext::Scoped scoped; + ShaderPools pools; +}; + +} // namespace OpenGL::ShaderContext