diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 10114909b..9431d64ac 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -1006,7 +1006,8 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()); }(); @@ -1051,7 +1052,8 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { } const auto cbuf = entry.GetBindlessCBuf(); Tegra::Texture::TextureHandle tex_handle; - tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, cbuf.first, cbuf.second); + tex_handle.raw = compute.AccessConstBuffer32(Tegra::Engines::ShaderType::Compute, + cbuf.first, cbuf.second); return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic; }(); SetupImage(bindpoint, tic, entry); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 42ca3b1bd..9e2799876 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -10,6 +10,7 @@ #include "common/scope_exit.h" #include "core/core.h" #include "core/frontend/emu_window.h" +#include "video_core/engines/kepler_compute.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" @@ -173,8 +174,9 @@ u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, } /// Creates an unspecialized program from code streams -GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, - ProgramCode program_code, ProgramCode program_code_b) { +GLShader::ProgramResult CreateProgram(Core::System& system, const Device& device, + ProgramType program_type, ProgramCode program_code, + ProgramCode program_code_b) { GLShader::ShaderSetup setup(program_code); setup.program.size_a = CalculateProgramSize(program_code); setup.program.size_b = 0; @@ -190,14 +192,25 @@ GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_ switch (program_type) { case ProgramType::VertexA: - case ProgramType::VertexB: - return GLShader::GenerateVertexShader(device, setup); - case ProgramType::Geometry: - return GLShader::GenerateGeometryShader(device, setup); - case ProgramType::Fragment: - return GLShader::GenerateFragmentShader(device, setup); - case ProgramType::Compute: - return GLShader::GenerateComputeShader(device, setup); + case ProgramType::VertexB: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Vertex, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateVertexShader(locker, device, setup); + } + case ProgramType::Geometry: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Geometry, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateGeometryShader(locker, device, setup); + } + case ProgramType::Fragment: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Fragment, + &(system.GPU().Maxwell3D())}; + return GLShader::GenerateFragmentShader(locker, device, setup); + } + case ProgramType::Compute: { + VideoCommon::Shader::ConstBufferLocker locker{Tegra::Engines::ShaderType::Compute, &(system.GPU().KeplerCompute())}; + return GLShader::GenerateComputeShader(locker, device, setup); + } default: UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast(program_type)); return {}; @@ -307,8 +320,8 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, ProgramCode&& program_code_b) { const auto code_size{CalculateProgramSize(program_code)}; const auto code_size_b{CalculateProgramSize(program_code_b)}; - auto result{ - CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; + auto result{CreateProgram(params.system, params.device, GetProgramType(program_type), + program_code, program_code_b)}; if (result.first.empty()) { // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now return {}; @@ -331,7 +344,7 @@ Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { - auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; + auto result{CreateProgram(params.system, params.device, ProgramType::Compute, code, {})}; const auto code_size{CalculateProgramSize(code)}; params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, @@ -566,7 +579,7 @@ std::unordered_map ShaderCacheOpenGL::GenerateUnspecia result = {stored_decompiled.code, stored_decompiled.entries}; } else { // Otherwise decompile the shader at boot and save the result to the decompiled file - result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(), + result = CreateProgram(system, device, raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB()); disk_cache.SaveDecompiled(unique_identifier, result.first, result.second); } @@ -612,7 +625,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, + const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, host_ptr, unique_identifier}; const auto found = precompiled_shaders.find(unique_identifier); @@ -639,7 +652,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, + const ShaderParameters params{disk_cache, precompiled_programs, system, device, cpu_addr, host_ptr, unique_identifier}; const auto found = precompiled_shaders.find(unique_identifier); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index de195cc5d..6ff78f005 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -45,6 +45,7 @@ using PrecompiledShaders = std::unordered_map; struct ShaderParameters { ShaderDiskCacheOpenGL& disk_cache; const PrecompiledPrograms& precompiled_programs; + Core::System& system; const Device& device; VAddr cpu_addr; u8* host_ptr; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index b5a43e79e..817c6e12c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -21,7 +21,8 @@ static constexpr u32 COMPUTE_OFFSET = 0; static constexpr CompilerSettings settings{CompileDepth::NoFlowStack, true}; -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: VS" + id + "\n\n"; @@ -35,14 +36,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; ProgramResult program = Decompile(device, program_ir, stage, "vertex"); out += program.first; if (setup.IsDualProgram()) { const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b, - settings); + settings, locker); ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); out += program_b.first; } @@ -71,7 +73,8 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: GS" + id + "\n\n"; @@ -85,7 +88,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); out += program.first; @@ -97,7 +101,8 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: FS" + id + "\n\n"; @@ -120,7 +125,8 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); out += program.first; @@ -133,13 +139,15 @@ void main() { return {std::move(out), std::move(program.second)}; } -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { +ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup) { const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); std::string out = "// Shader Unique Id: CS" + id + "\n\n"; out += GetCommonDeclarations(); - const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings); + const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a, settings, + locker); ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); out += program.first; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3833e88ab..05f157298 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -16,6 +16,7 @@ class Device; namespace OpenGL::GLShader { +using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; struct ShaderSetup { @@ -46,15 +47,19 @@ private: }; /// Generates the GLSL vertex shader program source code for the given VS program -ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateVertexShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL geometry shader program source code for the given GS program -ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateGeometryShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL fragment shader program source code for the given FS program -ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateFragmentShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); /// Generates the GLSL compute shader program source code for the given CS program -ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); +ProgramResult GenerateComputeShader(ConstBufferLocker& locker, const Device& device, + const ShaderSetup& setup); } // namespace OpenGL::GLShader diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp index 70f758642..dac2e4272 100644 --- a/src/video_core/shader/control_flow.cpp +++ b/src/video_core/shader/control_flow.cpp @@ -57,8 +57,8 @@ struct BlockInfo { struct CFGRebuildState { explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size, - const u32 start) - : start{start}, program_code{program_code}, program_size{program_size} {} + const u32 start, ConstBufferLocker& locker) + : start{start}, program_code{program_code}, program_size{program_size}, locker{locker} {} u32 start{}; std::vector block_info{}; @@ -72,6 +72,7 @@ struct CFGRebuildState { const ProgramCode& program_code; const std::size_t program_size; ASTManager* manager; + ConstBufferLocker& locker; }; enum class BlockCollision : u32 { None, Found, Inside }; @@ -214,7 +215,7 @@ std::optional TrackBranchIndirectInfo(const CFGRebuildState& if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) { if (instr.gpr0.Value() == track_register) { track_register = instr.gpr8.Value(); - result.entries = instr.alu.GetSignedImm20_20(); + result.entries = instr.alu.GetSignedImm20_20() + 1; pos--; found_track = true; break; @@ -406,8 +407,14 @@ std::pair ParseCode(CFGRebuildState& state, u32 address) auto tmp = TrackBranchIndirectInfo(state, address, offset); if (tmp) { auto result = *tmp; - LOG_CRITICAL(HW_GPU, "Track Successful, BRX: buffer:{}, offset:{}, entries:{}", - result.buffer, result.offset, result.entries); + std::string entries{}; + for (u32 i = 0; i < result.entries; i++) { + auto k = locker.ObtainKey(result.buffer, result.offset + i * 4); + entries = entries + std::to_string(*k) + '\n'; + } + LOG_CRITICAL(HW_GPU, + "Track Successful, BRX: buffer:{}, offset:{}, entries:{}, inner:\n{}", + result.buffer, result.offset, result.entries, entries); } else { LOG_CRITICAL(HW_GPU, "Track Unsuccesful"); } @@ -588,14 +595,15 @@ void DecompileShader(CFGRebuildState& state) { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings) { + const CompilerSettings& settings, + ConstBufferLocker& locker) { auto result_out = std::make_unique(); if (settings.depth == CompileDepth::BruteForce) { result_out->settings.depth = CompileDepth::BruteForce; return result_out; } - CFGRebuildState state{program_code, program_size, start_address}; + CFGRebuildState state{program_code, program_size, start_address, locker}; // Inspect Code and generate blocks state.labels.clear(); state.labels.emplace(start_address); diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h index 37e987d62..6d0e50d7c 100644 --- a/src/video_core/shader/control_flow.h +++ b/src/video_core/shader/control_flow.h @@ -78,6 +78,7 @@ struct ShaderCharacteristics { std::unique_ptr ScanFlow(const ProgramCode& program_code, std::size_t program_size, u32 start_address, - const CompilerSettings& settings); + const CompilerSettings& settings, + ConstBufferLocker& locker); } // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2626b1616..3f87b87ca 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -102,7 +102,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); decompiled = false; - auto info = ScanFlow(program_code, program_size, main_offset, settings); + auto info = ScanFlow(program_code, program_size, main_offset, settings, locker); auto& shader_info = *info; coverage_begin = shader_info.start; coverage_end = shader_info.end; diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index c1f2b88c8..6430575ec 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -23,9 +23,9 @@ using Tegra::Shader::PredOperation; using Tegra::Shader::Register; ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size, - CompilerSettings settings) + CompilerSettings settings, ConstBufferLocker& locker) : program_code{program_code}, main_offset{main_offset}, program_size{size}, basic_blocks{}, - program_manager{true, true}, settings{settings} { + program_manager{true, true}, settings{settings}, locker{locker} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 68818643c..e3b568d3e 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -68,7 +68,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size, - CompilerSettings settings); + CompilerSettings settings, ConstBufferLocker& locker); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -389,6 +389,7 @@ private: NodeBlock global_code; ASTManager program_manager; CompilerSettings settings{}; + ConstBufferLocker& locker; std::set used_registers; std::set used_predicates;