citra-emu
/
citra-canary
Archived
1
0
Fork 0

Merge pull request #1787 from JayFoxRox/refactor-jit

Refactor JIT
This commit is contained in:
linkmauve 2016-05-16 17:54:45 +01:00
commit f40fabd688
6 changed files with 50 additions and 32 deletions

View File

@ -64,6 +64,7 @@ MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs; auto& config = g_state.regs.vs;
auto& setup = g_state.vs;
MICROPROFILE_SCOPE(GPU_Shader); MICROPROFILE_SCOPE(GPU_Shader);
@ -81,11 +82,11 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) if (VideoCore::g_shader_jit_enabled)
jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); jit_shader->Run(setup, state, config.main_offset);
else else
RunInterpreter(state); RunInterpreter(setup, state, config.main_offset);
#else #else
RunInterpreter(state); RunInterpreter(setup, state, config.main_offset);
#endif // ARCHITECTURE_x86_64 #endif // ARCHITECTURE_x86_64
// Setup output data // Setup output data
@ -156,7 +157,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
state.conditional_code[0] = false; state.conditional_code[0] = false;
state.conditional_code[1] = false; state.conditional_code[1] = false;
RunInterpreter(state); RunInterpreter(setup, state, config.main_offset);
return state.debug; return state.debug;
} }

View File

@ -283,10 +283,10 @@ struct UnitState {
static size_t InputOffset(const SourceRegister& reg) { static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) { switch (reg.GetRegisterType()) {
case RegisterType::Input: case RegisterType::Input:
return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary: case RegisterType::Temporary:
return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default: default:
UNREACHABLE(); UNREACHABLE();
@ -297,10 +297,10 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) { static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) { switch (reg.GetRegisterType()) {
case RegisterType::Output: case RegisterType::Output:
return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); return offsetof(UnitState, registers.output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary: case RegisterType::Temporary:
return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default: default:
UNREACHABLE(); UNREACHABLE();
@ -323,6 +323,23 @@ struct ShaderSetup {
std::array<Math::Vec4<u8>, 4> i; std::array<Math::Vec4<u8>, 4> i;
} uniforms; } uniforms;
static size_t UniformOffset(RegisterType type, unsigned index) {
switch (type) {
case RegisterType::FloatUniform:
return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
case RegisterType::BoolUniform:
return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
case RegisterType::IntUniform:
return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
default:
UNREACHABLE();
return 0;
}
}
std::array<u32, 1024> program_code; std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data; std::array<u32, 1024> swizzle_data;

View File

@ -41,11 +41,11 @@ struct CallStackElement {
}; };
template<bool Debug> template<bool Debug>
void RunInterpreter(UnitState<Debug>& state) { void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
// TODO: Is there a maximal size for this? // TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack; boost::container::static_vector<CallStackElement, 16> call_stack;
u32 program_counter = g_state.regs.vs.main_offset; u32 program_counter = offset;
const auto& uniforms = g_state.vs.uniforms; const auto& uniforms = g_state.vs.uniforms;
const auto& swizzle_data = g_state.vs.swizzle_data; const auto& swizzle_data = g_state.vs.swizzle_data;
@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) {
} }
// Explicit instantiation // Explicit instantiation
template void RunInterpreter(UnitState<false>& state); template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
template void RunInterpreter(UnitState<true>& state); template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
} // namespace } // namespace

View File

@ -11,7 +11,7 @@ namespace Shader {
template <bool Debug> struct UnitState; template <bool Debug> struct UnitState;
template<bool Debug> template<bool Debug>
void RunInterpreter(UnitState<Debug>& state); void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
} // namespace } // namespace

View File

@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
// purposes, as documented below: // purposes, as documented below:
/// Pointer to the uniform memory /// Pointer to the uniform memory
static const X64Reg UNIFORMS = R9; static const X64Reg SETUP = R9;
/// The two 32-bit VS address offset registers set by the MOVA instruction /// The two 32-bit VS address offset registers set by the MOVA instruction
static const X64Reg ADDROFFS_REG_0 = R10; static const X64Reg ADDROFFS_REG_0 = R10;
static const X64Reg ADDROFFS_REG_1 = R11; static const X64Reg ADDROFFS_REG_1 = R11;
@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
/// Result of the previous CMP instruction for the Y-component comparison /// Result of the previous CMP instruction for the Y-component comparison
static const X64Reg COND1 = R14; static const X64Reg COND1 = R14;
/// Pointer to the UnitState instance for the current VS unit /// Pointer to the UnitState instance for the current VS unit
static const X64Reg REGISTERS = R15; static const X64Reg STATE = R15;
/// SIMD scratch register /// SIMD scratch register
static const X64Reg SCRATCH = XMM0; static const X64Reg SCRATCH = XMM0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
// State registers that must not be modified by external functions calls // State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = { static const BitSet32 persistent_regs = {
UNIFORMS, REGISTERS, // Pointers to register blocks SETUP, STATE, // Pointers to register blocks
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
ONE+16, NEGBIT+16, // Constants ONE+16, NEGBIT+16, // Constants
}; };
@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
size_t src_offset; size_t src_offset;
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
src_ptr = UNIFORMS; src_ptr = SETUP;
src_offset = src_reg.GetIndex() * sizeof(float24) * 4; src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
} else { } else {
src_ptr = REGISTERS; src_ptr = STATE;
src_offset = UnitState<false>::InputOffset(src_reg); src_offset = UnitState<false>::InputOffset(src_reg);
} }
@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
// If all components are enabled, write the result to the destination register // If all components are enabled, write the result to the destination register
if (swiz.dest_mask == NO_DEST_REG_MASK) { if (swiz.dest_mask == NO_DEST_REG_MASK) {
// Store dest back to memory // Store dest back to memory
MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); MOVAPS(MDisp(STATE, dest_offset_disp), src);
} else { } else {
// Not all components are enabled, so mask the result when storing to the destination register... // Not all components are enabled, so mask the result when storing to the destination register...
MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
if (Common::GetCPUCaps().sse4_1) { if (Common::GetCPUCaps().sse4_1) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
} }
// Store dest back to memory // Store dest back to memory
MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
} }
} }
@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
} }
void JitShader::Compile_UniformCondition(Instruction instr) { void JitShader::Compile_UniformCondition(Instruction instr) {
int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
} }
BitSet32 JitShader::PersistentCallerSavedRegs() { BitSet32 JitShader::PersistentCallerSavedRegs() {
@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
looping = true; looping = true;
int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
SHR(32, R(LOOPCOUNT_REG), Imm8(8)); SHR(32, R(LOOPCOUNT_REG), Imm8(8));
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@ -826,8 +826,8 @@ void JitShader::Compile() {
// The stack pointer is 8 modulo 16 at the entry of a procedure // The stack pointer is 8 modulo 16 at the entry of a procedure
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
// Zero address/loop registers // Zero address/loop registers
XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@ -845,7 +845,7 @@ void JitShader::Compile() {
MOVAPS(NEGBIT, MatR(RAX)); MOVAPS(NEGBIT, MatR(RAX));
// Jump to start of the shader program // Jump to start of the shader program
JMPptr(R(ABI_PARAM2)); JMPptr(R(ABI_PARAM3));
// Compile entire program // Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));

View File

@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
public: public:
JitShader(); JitShader();
void Run(void* registers, unsigned offset) const { void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
program(registers, code_ptr[offset]); program(&setup, &state, code_ptr[offset]);
} }
void Compile(); void Compile();
@ -117,7 +117,7 @@ private:
/// Branches that need to be fixed up once the entire shader program is compiled /// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
using CompiledShader = void(void* registers, const u8* start_addr); using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr; CompiledShader* program = nullptr;
}; };