From 459fce3a8f26241ff2a68c323e75fb70e7e1ba79 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 25 Jun 2019 07:57:32 -0400 Subject: [PATCH] shader_ir: propagate shader size to the IR --- .../renderer_opengl/gl_shader_cache.cpp | 22 +++++++++++++------ .../renderer_opengl/gl_shader_gen.cpp | 8 +++---- .../renderer_opengl/gl_shader_gen.h | 2 ++ src/video_core/shader/decode.cpp | 6 ++--- src/video_core/shader/shader_ir.cpp | 4 ++-- src/video_core/shader/shader_ir.h | 3 ++- 6 files changed, 28 insertions(+), 17 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f9b2b03a0..5d76ee12d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -129,9 +129,11 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { /// Hashes one (or two) program streams u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, - const ProgramCode& code_b) { - u64 unique_identifier = - Common::CityHash64(reinterpret_cast(code.data()), CalculateProgramSize(code)); + const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { + if (size_a == 0) { + size_a = CalculateProgramSize(code); + } + u64 unique_identifier = Common::CityHash64(reinterpret_cast(code.data()), size_a); if (program_type != Maxwell::ShaderProgram::VertexA) { return unique_identifier; } @@ -140,8 +142,11 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& std::size_t seed = 0; boost::hash_combine(seed, unique_identifier); - const u64 identifier_b = Common::CityHash64(reinterpret_cast(code_b.data()), - CalculateProgramSize(code_b)); + if (size_b == 0) { + size_b = CalculateProgramSize(code_b); + } + const u64 identifier_b = + Common::CityHash64(reinterpret_cast(code_b.data()), size_b); boost::hash_combine(seed, identifier_b); return static_cast(seed); } @@ -150,14 +155,17 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, ProgramCode program_code, ProgramCode program_code_b) { GLShader::ShaderSetup setup(program_code); + setup.program.size_a = CalculateProgramSize(program_code); + setup.program.size_b = 0; if (program_type == Maxwell::ShaderProgram::VertexA) { // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders. // Conventional HW does not support this, so we combine VertexA and VertexB into one // stage here. setup.SetProgramB(program_code_b); + setup.program.size_b = CalculateProgramSize(program_code_b); } - setup.program.unique_identifier = - GetUniqueIdentifier(program_type, program_code, program_code_b); + setup.program.unique_identifier = GetUniqueIdentifier( + program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); switch (program_type) { case Maxwell::ShaderProgram::VertexA: diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9148629ec..f9ee8429e 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -29,14 +29,14 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); out += program.first; if (setup.IsDualProgram()) { - const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET); + const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); ProgramResult program_b = Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); @@ -80,7 +80,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); out += program.first; @@ -115,7 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { }; )"; - const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); + const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); ProgramResult program = Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 0536c8a03..7cbc590f8 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -27,6 +27,8 @@ struct ShaderSetup { ProgramCode code; ProgramCode code_b; // Used for dual vertex shaders u64 unique_identifier; + std::size_t size_a; + std::size_t size_b; } program; /// Used in scenarios where we have a dual vertex shaders diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 65029d35e..09f55bd21 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -39,7 +39,7 @@ void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); ShaderCharacteristics shader_info{}; - bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info); if (can_proceed) { coverage_begin = shader_info.start; coverage_end = shader_info.end; @@ -52,12 +52,12 @@ void ShaderIR::Decode() { } return; } - LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); + LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); // Now we need to deal with an undecompilable shader. We need to brute force // a shader that captures every position. coverage_begin = shader_info.start; - const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + const u32 shader_end = static_cast(program_size / sizeof(u64)); coverage_end = shader_end; for (u32 label = main_offset; label < shader_end; label++) { basic_blocks.insert({label, DecodeRange(label, label + 1)}); diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp index 11b545cca..5994bfc4e 100644 --- a/src/video_core/shader/shader_ir.cpp +++ b/src/video_core/shader/shader_ir.cpp @@ -22,8 +22,8 @@ using Tegra::Shader::PredCondition; using Tegra::Shader::PredOperation; using Tegra::Shader::Register; -ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset) - : program_code{program_code}, main_offset{main_offset} { +ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size) + : program_code{program_code}, main_offset{main_offset}, program_size{size} { Decode(); } diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index e71462e02..a67d4f390 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -65,7 +65,7 @@ struct GlobalMemoryUsage { class ShaderIR final { public: - explicit ShaderIR(const ProgramCode& program_code, u32 main_offset); + explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size); ~ShaderIR(); const std::map& GetBasicBlocks() const { @@ -316,6 +316,7 @@ private: const ProgramCode& program_code; const u32 main_offset; + const std::size_t program_size; u32 coverage_begin{}; u32 coverage_end{};