|
|
@ -768,7 +768,7 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
|
|
|
Label l_loop_start;
|
|
|
|
Label l_loop_start;
|
|
|
|
l(l_loop_start);
|
|
|
|
l(l_loop_start);
|
|
|
|
|
|
|
|
|
|
|
|
loop_break_labels.emplace_back(oaknut::Label());
|
|
|
|
loop_break_labels.emplace_back(Label());
|
|
|
|
Compile_Block(instr.flow_control.dest_offset + 1);
|
|
|
|
Compile_Block(instr.flow_control.dest_offset + 1);
|
|
|
|
ADD(LOOPCOUNT_REG, LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
|
|
|
|
ADD(LOOPCOUNT_REG, LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
|
|
|
|
SUBS(LOOPCOUNT, LOOPCOUNT, 1); // Increment loop count by 1
|
|
|
|
SUBS(LOOPCOUNT, LOOPCOUNT, 1); // Increment loop count by 1
|
|
|
@ -922,7 +922,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
|
|
|
swizzle_data = swizzle_data_;
|
|
|
|
swizzle_data = swizzle_data_;
|
|
|
|
|
|
|
|
|
|
|
|
// Reset flow control state
|
|
|
|
// Reset flow control state
|
|
|
|
program = (CompiledShader*)current_address();
|
|
|
|
program = xptr<CompiledShader*>();
|
|
|
|
program_counter = 0;
|
|
|
|
program_counter = 0;
|
|
|
|
loop_depth = 0;
|
|
|
|
loop_depth = 0;
|
|
|
|
instruction_labels.fill(Label());
|
|
|
|
instruction_labels.fill(Label());
|
|
|
@ -968,15 +968,13 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
|
|
|
protect();
|
|
|
|
protect();
|
|
|
|
invalidate_all();
|
|
|
|
invalidate_all();
|
|
|
|
|
|
|
|
|
|
|
|
const std::size_t code_size =
|
|
|
|
const std::size_t code_size = static_cast<std::size_t>(offset());
|
|
|
|
current_address() - reinterpret_cast<uintptr_t>(oaknut::CodeBlock::ptr());
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ASSERT_MSG(code_size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
|
|
|
ASSERT_MSG(code_size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
|
|
|
|
LOG_DEBUG(HW_GPU, "Compiled shader size={}", code_size);
|
|
|
|
LOG_DEBUG(HW_GPU, "Compiled shader size={}", code_size);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
JitShader::JitShader()
|
|
|
|
JitShader::JitShader() : CodeBlock(MAX_SHADER_SIZE), CodeGenerator(CodeBlock::ptr()) {
|
|
|
|
: oaknut::CodeBlock(MAX_SHADER_SIZE), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {
|
|
|
|
|
|
|
|
unprotect();
|
|
|
|
unprotect();
|
|
|
|
CompilePrelude();
|
|
|
|
CompilePrelude();
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -986,8 +984,8 @@ void JitShader::CompilePrelude() {
|
|
|
|
exp2_subroutine = CompilePrelude_Exp2();
|
|
|
|
exp2_subroutine = CompilePrelude_Exp2();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
oaknut::Label JitShader::CompilePrelude_Log2() {
|
|
|
|
Label JitShader::CompilePrelude_Log2() {
|
|
|
|
oaknut::Label subroutine;
|
|
|
|
Label subroutine;
|
|
|
|
|
|
|
|
|
|
|
|
// We perform this approximation by first performing a range reduction into the range
|
|
|
|
// We perform this approximation by first performing a range reduction into the range
|
|
|
|
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
|
|
|
|
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
|
|
|
@ -995,44 +993,40 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
|
|
|
|
// range. Coefficients for the minimax polynomial.
|
|
|
|
// range. Coefficients for the minimax polynomial.
|
|
|
|
// f(x) computes approximately log2(x) / (x - 1).
|
|
|
|
// f(x) computes approximately log2(x) / (x - 1).
|
|
|
|
// f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)).
|
|
|
|
// f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)).
|
|
|
|
oaknut::Label c0;
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
l(c0);
|
|
|
|
const void* c0 = xptr<const void*>();
|
|
|
|
dw(0x3d74552f);
|
|
|
|
dw(0x3d74552f);
|
|
|
|
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
oaknut::Label c14;
|
|
|
|
const void* c14 = xptr<const void*>();
|
|
|
|
l(c14);
|
|
|
|
|
|
|
|
dw(0xbeee7397);
|
|
|
|
dw(0xbeee7397);
|
|
|
|
dw(0x3fbd96dd);
|
|
|
|
dw(0x3fbd96dd);
|
|
|
|
dw(0xc02153f6);
|
|
|
|
dw(0xc02153f6);
|
|
|
|
dw(0x4038d96c);
|
|
|
|
dw(0x4038d96c);
|
|
|
|
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
oaknut::Label negative_infinity_vector;
|
|
|
|
const void* negative_infinity_vector = xptr<const void*>();
|
|
|
|
l(negative_infinity_vector);
|
|
|
|
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
dw(0xff800000);
|
|
|
|
oaknut::Label default_qnan_vector;
|
|
|
|
const void* default_qnan_vector = xptr<const void*>();
|
|
|
|
l(default_qnan_vector);
|
|
|
|
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
dw(0x7fc00000);
|
|
|
|
|
|
|
|
|
|
|
|
oaknut::Label input_is_nan, input_is_zero, input_out_of_range;
|
|
|
|
Label input_is_nan, input_is_zero, input_out_of_range;
|
|
|
|
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
l(input_out_of_range);
|
|
|
|
l(input_out_of_range);
|
|
|
|
B(Cond::EQ, input_is_zero);
|
|
|
|
B(Cond::EQ, input_is_zero);
|
|
|
|
MOVP2R(XSCRATCH0, default_qnan_vector.ptr<void*>());
|
|
|
|
MOVP2R(XSCRATCH0, default_qnan_vector);
|
|
|
|
LDR(SRC1, XSCRATCH0);
|
|
|
|
LDR(SRC1, XSCRATCH0);
|
|
|
|
RET();
|
|
|
|
RET();
|
|
|
|
|
|
|
|
|
|
|
|
l(input_is_zero);
|
|
|
|
l(input_is_zero);
|
|
|
|
MOVP2R(XSCRATCH0, negative_infinity_vector.ptr<void*>());
|
|
|
|
MOVP2R(XSCRATCH0, negative_infinity_vector);
|
|
|
|
LDR(SRC1, XSCRATCH0);
|
|
|
|
LDR(SRC1, XSCRATCH0);
|
|
|
|
RET();
|
|
|
|
RET();
|
|
|
|
|
|
|
|
|
|
|
@ -1064,14 +1058,14 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
|
|
|
|
UCVTF(VSCRATCH1.toS(), VSCRATCH1.toS());
|
|
|
|
UCVTF(VSCRATCH1.toS(), VSCRATCH1.toS());
|
|
|
|
// VSCRATCH1 now contains the exponent of the input.
|
|
|
|
// VSCRATCH1 now contains the exponent of the input.
|
|
|
|
|
|
|
|
|
|
|
|
MOVP2R(XSCRATCH0, c0.ptr<void*>());
|
|
|
|
MOVP2R(XSCRATCH0, c0);
|
|
|
|
LDR(XSCRATCH0.toW(), XSCRATCH0);
|
|
|
|
LDR(XSCRATCH0.toW(), XSCRATCH0);
|
|
|
|
MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW());
|
|
|
|
MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW());
|
|
|
|
|
|
|
|
|
|
|
|
// Complete computation of polynomial
|
|
|
|
// Complete computation of polynomial
|
|
|
|
// Load C1,C2,C3,C4 into a single scratch register
|
|
|
|
// Load C1,C2,C3,C4 into a single scratch register
|
|
|
|
const QReg C14 = SRC2;
|
|
|
|
const QReg C14 = SRC2;
|
|
|
|
MOVP2R(XSCRATCH0, c14.ptr<void*>());
|
|
|
|
MOVP2R(XSCRATCH0, c14);
|
|
|
|
LDR(C14, XSCRATCH0);
|
|
|
|
LDR(C14, XSCRATCH0);
|
|
|
|
FMUL(VSCRATCH0.toS(), VSCRATCH0.toS(), SRC1.toS());
|
|
|
|
FMUL(VSCRATCH0.toS(), VSCRATCH0.toS(), SRC1.toS());
|
|
|
|
FMLA(VSCRATCH0.toS(), ONE.toS(), C14.Selem()[0]);
|
|
|
|
FMLA(VSCRATCH0.toS(), ONE.toS(), C14.Selem()[0]);
|
|
|
@ -1097,32 +1091,32 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
|
|
|
|
return subroutine;
|
|
|
|
return subroutine;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
oaknut::Label JitShader::CompilePrelude_Exp2() {
|
|
|
|
Label JitShader::CompilePrelude_Exp2() {
|
|
|
|
oaknut::Label subroutine;
|
|
|
|
Label subroutine;
|
|
|
|
|
|
|
|
|
|
|
|
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
|
|
|
|
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
|
|
|
|
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
|
|
|
|
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
|
|
|
|
// result into the appropriate range.
|
|
|
|
// result into the appropriate range.
|
|
|
|
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
const void* input_max = (const void*)current_address();
|
|
|
|
const void* input_max = xptr<const void*>();
|
|
|
|
dw(0x43010000);
|
|
|
|
dw(0x43010000);
|
|
|
|
const void* input_min = (const void*)current_address();
|
|
|
|
const void* input_min = xptr<const void*>();
|
|
|
|
dw(0xc2fdffff);
|
|
|
|
dw(0xc2fdffff);
|
|
|
|
const void* c0 = (const void*)current_address();
|
|
|
|
const void* c0 = xptr<const void*>();
|
|
|
|
dw(0x3c5dbe69);
|
|
|
|
dw(0x3c5dbe69);
|
|
|
|
const void* half = (const void*)current_address();
|
|
|
|
const void* half = xptr<const void*>();
|
|
|
|
dw(0x3f000000);
|
|
|
|
dw(0x3f000000);
|
|
|
|
const void* c1 = (const void*)current_address();
|
|
|
|
const void* c1 = xptr<const void*>();
|
|
|
|
dw(0x3d5509f9);
|
|
|
|
dw(0x3d5509f9);
|
|
|
|
const void* c2 = (const void*)current_address();
|
|
|
|
const void* c2 = xptr<const void*>();
|
|
|
|
dw(0x3e773cc5);
|
|
|
|
dw(0x3e773cc5);
|
|
|
|
const void* c3 = (const void*)current_address();
|
|
|
|
const void* c3 = xptr<const void*>();
|
|
|
|
dw(0x3f3168b3);
|
|
|
|
dw(0x3f3168b3);
|
|
|
|
const void* c4 = (const void*)current_address();
|
|
|
|
const void* c4 = xptr<const void*>();
|
|
|
|
dw(0x3f800016);
|
|
|
|
dw(0x3f800016);
|
|
|
|
|
|
|
|
|
|
|
|
oaknut::Label ret_label;
|
|
|
|
Label ret_label;
|
|
|
|
|
|
|
|
|
|
|
|
align(16);
|
|
|
|
align(16);
|
|
|
|
l(subroutine);
|
|
|
|
l(subroutine);
|
|
|
|