From a94297922b33146ccc66e3380cbb2b0ce12fe7ab Mon Sep 17 00:00:00 2001 From: GPUCode <47210458+GPUCode@users.noreply.github.com> Date: Tue, 22 Aug 2023 04:12:46 +0300 Subject: [PATCH] shader_interpreter: Fix control flow edge cases and implement break/breakc (#6844) --- externals/boost | 2 +- src/video_core/shader/shader_interpreter.cpp | 198 ++++++++++++------- 2 files changed, 128 insertions(+), 72 deletions(-) diff --git a/externals/boost b/externals/boost index 700ae2eff..3c27c785a 160000 --- a/externals/boost +++ b/externals/boost @@ -1 +1 @@ -Subproject commit 700ae2eff3134792f09cea2b051666688b1d5b97 +Subproject commit 3c27c785ad0f8a742af02e620dc225673f3a12d8 diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 681817686..cbdbfa2a8 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include "common/assert.h" @@ -26,32 +27,64 @@ using nihstro::SwizzlePattern; namespace Pica::Shader { +struct IfStackElement { + u32 else_address; + u32 end_address; +}; + struct CallStackElement { - u32 final_address; // Address upon which we jump to return_address - u32 return_address; // Where to jump when leaving scope - u8 repeat_counter; // How often to repeat until this call stack element is removed - u8 loop_increment; // Which value to add to the loop counter after an iteration - // TODO: Should this be a signed value? Does it even matter? - u32 loop_address; // The address where we'll return to after each loop iteration + u32 end_address; + u32 return_address; +}; + +struct LoopStackElement { + u32 entry_address; + u32 end_address; + u8 loop_downcounter; + u8 address_increment; + u8 previous_aL; }; template static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData& debug_data, - unsigned offset) { - // TODO: Is there a maximal size for this? - boost::container::static_vector call_stack; - u32 program_counter = offset; + unsigned entry_point) { + boost::circular_buffer if_stack(8); + boost::circular_buffer call_stack(4); + boost::circular_buffer loop_stack(4); + u32 program_counter = entry_point; state.conditional_code[0] = false; state.conditional_code[1] = false; - auto call = [&program_counter, &call_stack](u32 offset, u32 num_instructions, u32 return_offset, - u8 repeat_count, u8 loop_increment) { - // -1 to make sure when incrementing the PC we end up at the correct offset - program_counter = offset - 1; - ASSERT(call_stack.size() < call_stack.capacity()); - call_stack.push_back( - {offset + num_instructions, return_offset, repeat_count, loop_increment, offset}); + const auto do_if = [&](Instruction instr, bool condition) { + if (condition) { + if_stack.push_back({ + .else_address = instr.flow_control.dest_offset, + .end_address = instr.flow_control.dest_offset + instr.flow_control.num_instructions, + }); + } else { + program_counter = instr.flow_control.dest_offset - 1; + } + }; + + const auto do_call = [&](Instruction instr) { + call_stack.push_back({ + .end_address = instr.flow_control.dest_offset + instr.flow_control.num_instructions, + .return_address = program_counter + 1, + }); + program_counter = instr.flow_control.dest_offset - 1; + }; + + const auto do_loop = [&](Instruction instr, const Common::Vec4& loop_param) { + const u8 previous_aL = static_cast(state.address_registers[2]); + loop_stack.push_back({ + .entry_address = program_counter + 1, + .end_address = instr.flow_control.dest_offset + 1, + .loop_downcounter = loop_param.x, + .address_increment = loop_param.z, + .previous_aL = previous_aL, + }); + state.address_registers[2] = loop_param.y; }; auto evaluate_condition = [&state](Instruction::FlowControlType flow_control) { @@ -82,25 +115,11 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData // Placeholder for invalid inputs static f24 dummy_vec4_float24[4]; - unsigned iteration = 0; - bool exit_loop = false; - while (!exit_loop) { - if (!call_stack.empty()) { - auto& top = call_stack.back(); - if (program_counter == top.final_address) { - state.address_registers[2] += top.loop_increment; - - if (top.repeat_counter-- == 0) { - program_counter = top.return_address; - call_stack.pop_back(); - } else { - program_counter = top.loop_address; - } - - // TODO: Is "trying again" accurate to hardware? - continue; - } - } + u32 iteration = 0; + bool should_stop = false; + while (!should_stop) { + bool is_break = false; + const u32 old_program_counter = program_counter; const Instruction instr = {program_code[program_counter]}; const SwizzlePattern swizzle = {swizzle_data[instr.common.operand_desc_id]}; @@ -538,7 +557,7 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData // Handle each instruction on its own switch (instr.opcode.Value()) { case OpCode::Id::END: - exit_loop = true; + should_stop = true; break; case OpCode::Id::JMPC: @@ -559,72 +578,68 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData break; case OpCode::Id::CALL: - call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + do_call(instr); break; case OpCode::Id::CALLU: Record( debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + do_call(instr); } break; case OpCode::Id::CALLC: Record(debug_data, iteration, state.conditional_code); if (evaluate_condition(instr.flow_control)) { - call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, - program_counter + 1, 0, 0); + do_call(instr); } break; case OpCode::Id::NOP: break; - case OpCode::Id::IFU: + case OpCode::Id::IFU: { Record( debug_data, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); - if (uniforms.b[instr.flow_control.bool_uniform_id]) { - call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, - 0); - } else { - call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, - 0); - } - + const bool cond = uniforms.b[instr.flow_control.bool_uniform_id]; + do_if(instr, cond); break; + } case OpCode::Id::IFC: { // TODO: Do we need to consider swizzlers here? - Record(debug_data, iteration, state.conditional_code); - if (evaluate_condition(instr.flow_control)) { - call(program_counter + 1, instr.flow_control.dest_offset - program_counter - 1, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, - 0); - } else { - call(instr.flow_control.dest_offset, instr.flow_control.num_instructions, - instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, - 0); - } - + const bool cond = evaluate_condition(instr.flow_control); + do_if(instr, cond); break; } case OpCode::Id::LOOP: { - Common::Vec4 loop_param(uniforms.i[instr.flow_control.int_uniform_id].x, - uniforms.i[instr.flow_control.int_uniform_id].y, - uniforms.i[instr.flow_control.int_uniform_id].z, - uniforms.i[instr.flow_control.int_uniform_id].w); + const Common::Vec4& loop_param = uniforms.i[instr.flow_control.int_uniform_id]; state.address_registers[2] = loop_param.y; Record(debug_data, iteration, loop_param); - call(program_counter + 1, instr.flow_control.dest_offset - program_counter, - instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); + do_loop(instr, loop_param); + Record(debug_data, iteration, + state.address_registers); + break; + } + + case OpCode::Id::BREAK: { + is_break = true; + Record(debug_data, iteration, + state.address_registers); + break; + } + + case OpCode::Id::BREAKC: { + Record(debug_data, iteration, state.conditional_code); + if (evaluate_condition(instr.flow_control)) { + is_break = true; + } + Record(debug_data, iteration, + state.address_registers); break; } @@ -657,6 +672,47 @@ static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData ++program_counter; ++iteration; + + // Stacks are checked in the order CALL -> IF -> LOOP. The CALL stack + // can be popped multiple times per instruction. A JMP at the end of a + // scope is never taken, this is why we compare against + // old_program_counter + 1 here. + u32 next_program_counter = old_program_counter + 1; + for (u32 i = 0; i < 4; i++) { + if (call_stack.empty() || call_stack.back().end_address != next_program_counter) + break; + // Hardware bug: when popping four CALL scopes at once, the last + // one doesn't update the program counter + if (i < 3) { + program_counter = call_stack.back().return_address; + next_program_counter = program_counter; + } + call_stack.pop_back(); + } + + // The other two stacks can only pop one entry per instruction. They + // are checked against the original program counter before any CALL + // scopes were closed and they overwrite any previous program counter + // updates. + if (!if_stack.empty() && if_stack.back().else_address == old_program_counter + 1) { + program_counter = if_stack.back().end_address; + if_stack.pop_back(); + } + + if (!loop_stack.empty() && + (loop_stack.back().end_address == old_program_counter + 1 || is_break)) { + auto& loop = loop_stack.back(); + state.address_registers[2] += loop.address_increment; + if (!is_break && loop.loop_downcounter--) { + program_counter = loop.entry_address; + } else { + program_counter = loop.end_address; + // Only restore previous value if there is a surrounding LOOP scope. + if (loop_stack.size() > 1) + state.address_registers[2] = loop.previous_aL; + loop_stack.pop_back(); + } + } } }