yuzu-emu
/
yuzu
Archived
1
0
Fork 0

gl_shader_gen: Implement dual vertex shader mode.

- When VertexA shader stage is enabled, we combine with VertexB program to make a single Vertex Shader stage.
This commit is contained in:
bunnei 2018-07-12 22:25:03 -04:00
parent ce23ae3ede
commit c4015cd93a
5 changed files with 139 additions and 55 deletions

View File

@ -181,6 +181,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
return {array_ptr, buffer_offset}; return {array_ptr, buffer_offset};
} }
static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) {
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
// Fetch program code from memory
GLShader::ProgramCode program_code;
auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
return program_code;
}
void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
// Helper function for uploading uniform data // Helper function for uploading uniform data
const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
@ -193,18 +206,17 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
}; };
auto& gpu = Core::System().GetInstance().GPU().Maxwell3D(); auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_constbuffer_bindpoint = uniform_buffers.size();
u32 current_texture_bindpoint = 0; u32 current_texture_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
auto& shader_config = gpu.regs.shader_config[index]; auto& shader_config = gpu.regs.shader_config[index];
const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
const auto& stage = index - 1; // Stage indices are 0 - 5 const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage)); const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
buffer_ptr += sizeof(GLShader::MaxwellUniformData); buffer_ptr += sizeof(GLShader::MaxwellUniformData);
buffer_offset += sizeof(GLShader::MaxwellUniformData); buffer_offset += sizeof(GLShader::MaxwellUniformData);
// Fetch program code from memory GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
GLShader::ProgramCode program_code;
const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
GLShader::ShaderSetup setup{std::move(program_code)};
GLShader::ShaderEntries shader_resources; GLShader::ShaderEntries shader_resources;
switch (program) { switch (program) {
case Maxwell::ShaderProgram::VertexA: {
// VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
// Conventional HW does not support this, so we combine VertexA and VertexB into one
// stage here.
setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB));
GLShader::MaxwellVSConfig vs_config{setup};
shader_resources =
shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
break;
}
case Maxwell::ShaderProgram::VertexB: { case Maxwell::ShaderProgram::VertexB: {
GLShader::MaxwellVSConfig vs_config{setup}; GLShader::MaxwellVSConfig vs_config{setup};
shader_resources = shader_resources =
@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
current_texture_bindpoint = current_texture_bindpoint =
SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program, SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
current_texture_bindpoint, shader_resources.texture_samplers); current_texture_bindpoint, shader_resources.texture_samplers);
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
// VertexB was combined with VertexA, so we skip the VertexB iteration
index++;
}
} }
shader_program_manager->UseTrivialGeometryShader(); shader_program_manager->UseTrivialGeometryShader();

View File

@ -42,11 +42,12 @@ enum class ExitMethod {
struct Subroutine { struct Subroutine {
/// Generates a name suitable for GLSL source code. /// Generates a name suitable for GLSL source code.
std::string GetName() const { std::string GetName() const {
return "sub_" + std::to_string(begin) + '_' + std::to_string(end); return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
} }
u32 begin; ///< Entry point of the subroutine. u32 begin; ///< Entry point of the subroutine.
u32 end; ///< Return point of the subroutine. u32 end; ///< Return point of the subroutine.
const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
ExitMethod exit_method; ///< Exit method of the subroutine. ExitMethod exit_method; ///< Exit method of the subroutine.
std::set<u32> labels; ///< Addresses refereced by JMP instructions. std::set<u32> labels; ///< Addresses refereced by JMP instructions.
@ -58,11 +59,11 @@ struct Subroutine {
/// Analyzes shader code and produces a set of subroutines. /// Analyzes shader code and produces a set of subroutines.
class ControlFlowAnalyzer { class ControlFlowAnalyzer {
public: public:
ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset) ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
: program_code(program_code) { : program_code(program_code) {
// Recursively finds all subroutines. // Recursively finds all subroutines.
const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END); const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
if (program_main.exit_method != ExitMethod::AlwaysEnd) if (program_main.exit_method != ExitMethod::AlwaysEnd)
throw DecompileFail("Program does not always end"); throw DecompileFail("Program does not always end");
} }
@ -77,12 +78,12 @@ private:
std::map<std::pair<u32, u32>, ExitMethod> exit_method_map; std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
/// Adds and analyzes a new subroutine if it is not added yet. /// Adds and analyzes a new subroutine if it is not added yet.
const Subroutine& AddSubroutine(u32 begin, u32 end) { const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
auto iter = subroutines.find(Subroutine{begin, end}); auto iter = subroutines.find(Subroutine{begin, end, suffix});
if (iter != subroutines.end()) if (iter != subroutines.end())
return *iter; return *iter;
Subroutine subroutine{begin, end}; Subroutine subroutine{begin, end, suffix};
subroutine.exit_method = Scan(begin, end, subroutine.labels); subroutine.exit_method = Scan(begin, end, subroutine.labels);
if (subroutine.exit_method == ExitMethod::Undetermined) if (subroutine.exit_method == ExitMethod::Undetermined)
throw DecompileFail("Recursive function detected"); throw DecompileFail("Recursive function detected");
@ -191,7 +192,8 @@ public:
UnsignedInteger, UnsignedInteger,
}; };
GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {} GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix)
: index{index}, shader{shader}, suffix{suffix} {}
/// Gets the GLSL type string for a register /// Gets the GLSL type string for a register
static std::string GetTypeString(Type type) { static std::string GetTypeString(Type type) {
@ -216,7 +218,7 @@ public:
/// Returns a GLSL string representing the current state of the register /// Returns a GLSL string representing the current state of the register
const std::string GetActiveString() { const std::string GetActiveString() {
declr_type.insert(active_type); declr_type.insert(active_type);
return GetPrefixString(active_type) + std::to_string(index); return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix;
} }
/// Returns true if the active type is a float /// Returns true if the active type is a float
@ -251,6 +253,7 @@ private:
ShaderWriter& shader; ShaderWriter& shader;
Type active_type{Type::Float}; Type active_type{Type::Float};
std::set<Type> declr_type; std::set<Type> declr_type;
const std::string& suffix;
}; };
/** /**
@ -262,8 +265,8 @@ private:
class GLSLRegisterManager { class GLSLRegisterManager {
public: public:
GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations, GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
const Maxwell3D::Regs::ShaderStage& stage) const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
: shader{shader}, declarations{declarations}, stage{stage} { : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
BuildRegisterList(); BuildRegisterList();
} }
@ -430,12 +433,12 @@ public:
} }
/// Add declarations for registers /// Add declarations for registers
void GenerateDeclarations() { void GenerateDeclarations(const std::string& suffix) {
for (const auto& reg : regs) { for (const auto& reg : regs) {
for (const auto& type : reg.DeclaredTypes()) { for (const auto& type : reg.DeclaredTypes()) {
declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' + declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' +
GLSLRegister::GetPrefixString(type) + reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) +
std::to_string(reg.GetIndex()) + " = 0;"); '_' + suffix + " = 0;");
} }
} }
declarations.AddNewLine(); declarations.AddNewLine();
@ -558,7 +561,7 @@ private:
/// Build the GLSL register list. /// Build the GLSL register list.
void BuildRegisterList() { void BuildRegisterList() {
for (size_t index = 0; index < Register::NumRegisters; ++index) { for (size_t index = 0; index < Register::NumRegisters; ++index) {
regs.emplace_back(index, shader); regs.emplace_back(index, shader, suffix);
} }
} }
@ -620,16 +623,17 @@ private:
std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers; std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
std::vector<SamplerEntry> used_samplers; std::vector<SamplerEntry> used_samplers;
const Maxwell3D::Regs::ShaderStage& stage; const Maxwell3D::Regs::ShaderStage& stage;
const std::string& suffix;
}; };
class GLSLGenerator { class GLSLGenerator {
public: public:
GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code, GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
u32 main_offset, Maxwell3D::Regs::ShaderStage stage) u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
: subroutines(subroutines), program_code(program_code), main_offset(main_offset), : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
stage(stage) { stage(stage), suffix(suffix) {
Generate(); Generate(suffix);
} }
std::string GetShaderCode() { std::string GetShaderCode() {
@ -644,7 +648,7 @@ public:
private: private:
/// Gets the Subroutine object corresponding to the specified address. /// Gets the Subroutine object corresponding to the specified address.
const Subroutine& GetSubroutine(u32 begin, u32 end) const { const Subroutine& GetSubroutine(u32 begin, u32 end) const {
auto iter = subroutines.find(Subroutine{begin, end}); auto iter = subroutines.find(Subroutine{begin, end, suffix});
ASSERT(iter != subroutines.end()); ASSERT(iter != subroutines.end());
return *iter; return *iter;
} }
@ -689,7 +693,7 @@ private:
// Can't assign to the constant predicate. // Can't assign to the constant predicate.
ASSERT(pred != static_cast<u64>(Pred::UnusedIndex)); ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
std::string variable = 'p' + std::to_string(pred); std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
shader.AddLine(variable + " = " + value + ';'); shader.AddLine(variable + " = " + value + ';');
declr_predicates.insert(std::move(variable)); declr_predicates.insert(std::move(variable));
} }
@ -707,7 +711,7 @@ private:
if (index == static_cast<u64>(Pred::UnusedIndex)) if (index == static_cast<u64>(Pred::UnusedIndex))
variable = "true"; variable = "true";
else else
variable = 'p' + std::to_string(index); variable = 'p' + std::to_string(index) + '_' + suffix;
if (negate) { if (negate) {
return "!(" + variable + ')'; return "!(" + variable + ')';
@ -1728,7 +1732,7 @@ private:
return program_counter; return program_counter;
} }
void Generate() { void Generate(const std::string& suffix) {
// Add declarations for all subroutines // Add declarations for all subroutines
for (const auto& subroutine : subroutines) { for (const auto& subroutine : subroutines) {
shader.AddLine("bool " + subroutine.GetName() + "();"); shader.AddLine("bool " + subroutine.GetName() + "();");
@ -1736,7 +1740,7 @@ private:
shader.AddNewLine(); shader.AddNewLine();
// Add the main entry point // Add the main entry point
shader.AddLine("bool exec_shader() {"); shader.AddLine("bool exec_" + suffix + "() {");
++shader.scope; ++shader.scope;
CallSubroutine(GetSubroutine(main_offset, PROGRAM_END)); CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
--shader.scope; --shader.scope;
@ -1799,7 +1803,7 @@ private:
/// Add declarations for registers /// Add declarations for registers
void GenerateDeclarations() { void GenerateDeclarations() {
regs.GenerateDeclarations(); regs.GenerateDeclarations(suffix);
for (const auto& pred : declr_predicates) { for (const auto& pred : declr_predicates) {
declarations.AddLine("bool " + pred + " = false;"); declarations.AddLine("bool " + pred + " = false;");
@ -1812,27 +1816,30 @@ private:
const ProgramCode& program_code; const ProgramCode& program_code;
const u32 main_offset; const u32 main_offset;
Maxwell3D::Regs::ShaderStage stage; Maxwell3D::Regs::ShaderStage stage;
const std::string& suffix;
ShaderWriter shader; ShaderWriter shader;
ShaderWriter declarations; ShaderWriter declarations;
GLSLRegisterManager regs{shader, declarations, stage}; GLSLRegisterManager regs{shader, declarations, stage, suffix};
// Declarations // Declarations
std::set<std::string> declr_predicates; std::set<std::string> declr_predicates;
}; // namespace Decompiler }; // namespace Decompiler
std::string GetCommonDeclarations() { std::string GetCommonDeclarations() {
std::string declarations = "bool exec_shader();\n"; std::string declarations;
declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
declarations += '\n';
return declarations; return declarations;
} }
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage) { Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix) {
try { try {
auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines(); auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
GLSLGenerator generator(subroutines, program_code, main_offset, stage); GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
return ProgramResult{generator.GetShaderCode(), generator.GetEntries()}; return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
} catch (const DecompileFail& exception) { } catch (const DecompileFail& exception) {
LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what()); LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());

View File

@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D;
std::string GetCommonDeclarations(); std::string GetCommonDeclarations();
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
Maxwell3D::Regs::ShaderStage stage); Maxwell3D::Regs::ShaderStage stage,
const std::string& suffix);
} // namespace Decompiler } // namespace Decompiler
} // namespace GLShader } // namespace GLShader

View File

@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf
std::string out = "#version 430 core\n"; std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations(); out += Decompiler::GetCommonDeclarations();
out += "bool exec_vertex();\n";
ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, if (setup.IsDualProgram()) {
Maxwell3D::Regs::ShaderStage::Vertex) out += "bool exec_vertex_b();\n";
}
ProgramResult program =
Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
.get_value_or({}); .get_value_or({});
out += R"( out += R"(
out gl_PerVertex { out gl_PerVertex {
@ -34,7 +41,14 @@ layout (std140) uniform vs_config {
}; };
void main() { void main() {
exec_shader(); exec_vertex();
)";
if (setup.IsDualProgram()) {
out += " exec_vertex_b();";
}
out += R"(
// Viewport can be flipped, which is unsupported by glViewport // Viewport can be flipped, which is unsupported by glViewport
position.xy *= viewport_flip.xy; position.xy *= viewport_flip.xy;
@ -44,8 +58,19 @@ void main() {
// For now, this is here to bring order in lieu of proper emulation // For now, this is here to bring order in lieu of proper emulation
position.w = 1.0; position.w = 1.0;
} }
)"; )";
out += program.first; out += program.first;
if (setup.IsDualProgram()) {
ProgramResult program_b =
Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
.get_value_or({});
out += program_b.first;
}
return {out, program.second}; return {out, program.second};
} }
@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
std::string out = "#version 430 core\n"; std::string out = "#version 430 core\n";
out += "#extension GL_ARB_separate_shader_objects : enable\n\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
out += Decompiler::GetCommonDeclarations(); out += Decompiler::GetCommonDeclarations();
out += "bool exec_fragment();\n";
ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET, ProgramResult program =
Maxwell3D::Regs::ShaderStage::Fragment) Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
.get_value_or({}); .get_value_or({});
out += R"( out += R"(
in vec4 position; in vec4 position;
out vec4 color; out vec4 color;
@ -67,7 +93,7 @@ layout (std140) uniform fs_config {
}; };
void main() { void main() {
exec_shader(); exec_fragment();
} }
)"; )";

View File

@ -115,21 +115,48 @@ struct ShaderEntries {
using ProgramResult = std::pair<std::string, ShaderEntries>; using ProgramResult = std::pair<std::string, ShaderEntries>;
struct ShaderSetup { struct ShaderSetup {
ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {} ShaderSetup(const ProgramCode& program_code) {
program.code = program_code;
}
struct {
ProgramCode code;
ProgramCode code_b; // Used for dual vertex shaders
} program;
ProgramCode program_code;
bool program_code_hash_dirty = true; bool program_code_hash_dirty = true;
u64 GetProgramCodeHash() { u64 GetProgramCodeHash() {
if (program_code_hash_dirty) { if (program_code_hash_dirty) {
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code)); program_code_hash = GetNewHash();
program_code_hash_dirty = false; program_code_hash_dirty = false;
} }
return program_code_hash; return program_code_hash;
} }
/// Used in scenarios where we have a dual vertex shaders
void SetProgramB(const ProgramCode& program_b) {
program.code_b = program_b;
has_program_b = true;
}
bool IsDualProgram() const {
return has_program_b;
}
private: private:
u64 GetNewHash() const {
if (has_program_b) {
// Compute hash over dual shader programs
return Common::ComputeHash64(&program, sizeof(program));
} else {
// Compute hash over a single shader program
return Common::ComputeHash64(&program.code, program.code.size());
}
}
u64 program_code_hash{}; u64 program_code_hash{};
bool has_program_b{};
}; };
struct MaxwellShaderConfigCommon { struct MaxwellShaderConfigCommon {