citra-emu
/
citra
Archived
1
0
Fork 0

externals: Update dynarmic to 6.6.1, Update oaknut to 2.0.1 (#7398)

This commit is contained in:
merry 2024-01-31 03:50:39 +00:00 committed by GitHub
parent 469f76b075
commit 63feac6bb3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 33 additions and 37 deletions

2
externals/dynarmic vendored

@ -1 +1 @@
Subproject commit d333a09b3b9152af3cb442902ae8ea18d8416470 Subproject commit ca0e264f4f962e29baa23a3282ce484625866b98

2
externals/oaknut vendored

@ -1 +1 @@
Subproject commit e6eecc3f9460728be0a8d3f63e66d31c0362f472 Subproject commit 9d091109deb445bc6e9289c6195a282b7c993d49

View File

@ -20,7 +20,7 @@ inline bool IsWithin128M(uintptr_t ref, uintptr_t target) {
} }
inline bool IsWithin128M(const oaknut::CodeGenerator& code, uintptr_t target) { inline bool IsWithin128M(const oaknut::CodeGenerator& code, uintptr_t target) {
return IsWithin128M(code.ptr<uintptr_t>(), target); return IsWithin128M(code.xptr<uintptr_t>(), target);
} }
template <typename T> template <typename T>

View File

@ -768,7 +768,7 @@ void JitShader::Compile_LOOP(Instruction instr) {
Label l_loop_start; Label l_loop_start;
l(l_loop_start); l(l_loop_start);
loop_break_labels.emplace_back(oaknut::Label()); loop_break_labels.emplace_back(Label());
Compile_Block(instr.flow_control.dest_offset + 1); Compile_Block(instr.flow_control.dest_offset + 1);
ADD(LOOPCOUNT_REG, LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component ADD(LOOPCOUNT_REG, LOOPCOUNT_REG, LOOPINC); // Increment LOOPCOUNT_REG by Z-component
SUBS(LOOPCOUNT, LOOPCOUNT, 1); // Increment loop count by 1 SUBS(LOOPCOUNT, LOOPCOUNT, 1); // Increment loop count by 1
@ -922,7 +922,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
swizzle_data = swizzle_data_; swizzle_data = swizzle_data_;
// Reset flow control state // Reset flow control state
program = (CompiledShader*)current_address(); program = xptr<CompiledShader*>();
program_counter = 0; program_counter = 0;
loop_depth = 0; loop_depth = 0;
instruction_labels.fill(Label()); instruction_labels.fill(Label());
@ -968,15 +968,13 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
protect(); protect();
invalidate_all(); invalidate_all();
const std::size_t code_size = const std::size_t code_size = static_cast<std::size_t>(offset());
current_address() - reinterpret_cast<uintptr_t>(oaknut::CodeBlock::ptr());
ASSERT_MSG(code_size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); ASSERT_MSG(code_size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!");
LOG_DEBUG(HW_GPU, "Compiled shader size={}", code_size); LOG_DEBUG(HW_GPU, "Compiled shader size={}", code_size);
} }
JitShader::JitShader() JitShader::JitShader() : CodeBlock(MAX_SHADER_SIZE), CodeGenerator(CodeBlock::ptr()) {
: oaknut::CodeBlock(MAX_SHADER_SIZE), oaknut::CodeGenerator(oaknut::CodeBlock::ptr()) {
unprotect(); unprotect();
CompilePrelude(); CompilePrelude();
} }
@ -986,8 +984,8 @@ void JitShader::CompilePrelude() {
exp2_subroutine = CompilePrelude_Exp2(); exp2_subroutine = CompilePrelude_Exp2();
} }
oaknut::Label JitShader::CompilePrelude_Log2() { Label JitShader::CompilePrelude_Log2() {
oaknut::Label subroutine; Label subroutine;
// We perform this approximation by first performing a range reduction into the range // We perform this approximation by first performing a range reduction into the range
// [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then // [1.0, 2.0). A minimax polynomial which was fit for the function log2(x) / (x - 1) is then
@ -995,44 +993,40 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
// range. Coefficients for the minimax polynomial. // range. Coefficients for the minimax polynomial.
// f(x) computes approximately log2(x) / (x - 1). // f(x) computes approximately log2(x) / (x - 1).
// f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)). // f(x) = c4 + x * (c3 + x * (c2 + x * (c1 + x * c0)).
oaknut::Label c0;
align(16); align(16);
l(c0); const void* c0 = xptr<const void*>();
dw(0x3d74552f); dw(0x3d74552f);
align(16); align(16);
oaknut::Label c14; const void* c14 = xptr<const void*>();
l(c14);
dw(0xbeee7397); dw(0xbeee7397);
dw(0x3fbd96dd); dw(0x3fbd96dd);
dw(0xc02153f6); dw(0xc02153f6);
dw(0x4038d96c); dw(0x4038d96c);
align(16); align(16);
oaknut::Label negative_infinity_vector; const void* negative_infinity_vector = xptr<const void*>();
l(negative_infinity_vector);
dw(0xff800000); dw(0xff800000);
dw(0xff800000); dw(0xff800000);
dw(0xff800000); dw(0xff800000);
dw(0xff800000); dw(0xff800000);
oaknut::Label default_qnan_vector; const void* default_qnan_vector = xptr<const void*>();
l(default_qnan_vector);
dw(0x7fc00000); dw(0x7fc00000);
dw(0x7fc00000); dw(0x7fc00000);
dw(0x7fc00000); dw(0x7fc00000);
dw(0x7fc00000); dw(0x7fc00000);
oaknut::Label input_is_nan, input_is_zero, input_out_of_range; Label input_is_nan, input_is_zero, input_out_of_range;
align(16); align(16);
l(input_out_of_range); l(input_out_of_range);
B(Cond::EQ, input_is_zero); B(Cond::EQ, input_is_zero);
MOVP2R(XSCRATCH0, default_qnan_vector.ptr<void*>()); MOVP2R(XSCRATCH0, default_qnan_vector);
LDR(SRC1, XSCRATCH0); LDR(SRC1, XSCRATCH0);
RET(); RET();
l(input_is_zero); l(input_is_zero);
MOVP2R(XSCRATCH0, negative_infinity_vector.ptr<void*>()); MOVP2R(XSCRATCH0, negative_infinity_vector);
LDR(SRC1, XSCRATCH0); LDR(SRC1, XSCRATCH0);
RET(); RET();
@ -1064,14 +1058,14 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
UCVTF(VSCRATCH1.toS(), VSCRATCH1.toS()); UCVTF(VSCRATCH1.toS(), VSCRATCH1.toS());
// VSCRATCH1 now contains the exponent of the input. // VSCRATCH1 now contains the exponent of the input.
MOVP2R(XSCRATCH0, c0.ptr<void*>()); MOVP2R(XSCRATCH0, c0);
LDR(XSCRATCH0.toW(), XSCRATCH0); LDR(XSCRATCH0.toW(), XSCRATCH0);
MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW()); MOV(VSCRATCH0.Selem()[0], XSCRATCH0.toW());
// Complete computation of polynomial // Complete computation of polynomial
// Load C1,C2,C3,C4 into a single scratch register // Load C1,C2,C3,C4 into a single scratch register
const QReg C14 = SRC2; const QReg C14 = SRC2;
MOVP2R(XSCRATCH0, c14.ptr<void*>()); MOVP2R(XSCRATCH0, c14);
LDR(C14, XSCRATCH0); LDR(C14, XSCRATCH0);
FMUL(VSCRATCH0.toS(), VSCRATCH0.toS(), SRC1.toS()); FMUL(VSCRATCH0.toS(), VSCRATCH0.toS(), SRC1.toS());
FMLA(VSCRATCH0.toS(), ONE.toS(), C14.Selem()[0]); FMLA(VSCRATCH0.toS(), ONE.toS(), C14.Selem()[0]);
@ -1097,32 +1091,32 @@ oaknut::Label JitShader::CompilePrelude_Log2() {
return subroutine; return subroutine;
} }
oaknut::Label JitShader::CompilePrelude_Exp2() { Label JitShader::CompilePrelude_Exp2() {
oaknut::Label subroutine; Label subroutine;
// This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax // This approximation first performs a range reduction into the range [-0.5, 0.5). A minmax
// polynomial which was fit for the function exp2(x) is then evaluated. We then restore the // polynomial which was fit for the function exp2(x) is then evaluated. We then restore the
// result into the appropriate range. // result into the appropriate range.
align(16); align(16);
const void* input_max = (const void*)current_address(); const void* input_max = xptr<const void*>();
dw(0x43010000); dw(0x43010000);
const void* input_min = (const void*)current_address(); const void* input_min = xptr<const void*>();
dw(0xc2fdffff); dw(0xc2fdffff);
const void* c0 = (const void*)current_address(); const void* c0 = xptr<const void*>();
dw(0x3c5dbe69); dw(0x3c5dbe69);
const void* half = (const void*)current_address(); const void* half = xptr<const void*>();
dw(0x3f000000); dw(0x3f000000);
const void* c1 = (const void*)current_address(); const void* c1 = xptr<const void*>();
dw(0x3d5509f9); dw(0x3d5509f9);
const void* c2 = (const void*)current_address(); const void* c2 = xptr<const void*>();
dw(0x3e773cc5); dw(0x3e773cc5);
const void* c3 = (const void*)current_address(); const void* c3 = xptr<const void*>();
dw(0x3f3168b3); dw(0x3f3168b3);
const void* c4 = (const void*)current_address(); const void* c4 = xptr<const void*>();
dw(0x3f800016); dw(0x3f800016);
oaknut::Label ret_label; Label ret_label;
align(16); align(16);
l(subroutine); l(subroutine);

View File

@ -37,12 +37,14 @@ constexpr std::size_t MAX_SHADER_SIZE = MAX_PROGRAM_CODE_LENGTH * 256;
* This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64 * This class implements the shader JIT compiler. It recompiles a Pica shader program into x86_64
* code that can be executed on the host machine directly. * code that can be executed on the host machine directly.
*/ */
class JitShader : private oaknut::CodeBlock, public oaknut::CodeGenerator { class JitShader : private oaknut::CodeBlock, private oaknut::CodeGenerator {
public: public:
JitShader(); JitShader();
void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const { void Run(const ShaderSetup& setup, ShaderUnit& state, u32 offset) const {
program(&setup.uniforms, &state, instruction_labels[offset].ptr<const std::byte*>()); program(&setup.uniforms, &state,
reinterpret_cast<std::byte*>(oaknut::CodeBlock::ptr()) +
instruction_labels[offset].offset());
} }
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code, void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,