From 27fb97377eeb40849260ea866a90519521c6f59b Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Sun, 21 Mar 2021 04:33:19 +0100 Subject: [PATCH] shader: Fix floating point comparison for FP16 --- .../maxwell/translate/impl/common_funcs.cpp | 2 +- .../maxwell/translate/impl/common_funcs.h | 4 +-- .../impl/half_floating_point_set.cpp | 34 +++++++++---------- .../half_floating_point_set_predicate.cpp | 20 +++++------ .../ir_opt/lower_fp16_to_fp32.cpp | 24 +++++++++++++ 5 files changed, 54 insertions(+), 30 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index af9a8f82c..d30e82b10 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -72,7 +72,7 @@ bool IsCompareOpOrdered(FPCompareOp op) { } } -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control) { const bool ordered{IsCompareOpOrdered(compare_op)}; switch (compare_op) { diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index f8add3c34..f584060b3 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -18,7 +18,7 @@ namespace Shader::Maxwell { [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); -[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, - const IR::F32& operand_2, FPCompareOp compare_op, +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F16F32F64& operand_1, + const IR::F16F32F64& operand_2, FPCompareOp compare_op, IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp index 4825ca06a..1d28c0531 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set.cpp @@ -22,8 +22,8 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hset2.src_a_reg), hset2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - //if (lhs_a.Type() != lhs_b.Type()) { + + if (lhs_a.Type() != lhs_b.Type()) { if (lhs_a.Type() == IR::Type::F16) { lhs_a = v.ir.FPConvert(32, lhs_a); rhs_a = v.ir.FPConvert(32, rhs_a); @@ -32,7 +32,7 @@ void HSET2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool bf, bool f lhs_b = v.ir.FPConvert(32, lhs_b); rhs_b = v.ir.FPConvert(32, rhs_b); } - //} + } lhs_a = v.ir.FPAbsNeg(lhs_a, hset2.abs_a != 0, hset2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hset2.abs_a != 0, hset2.neg_a != 0); @@ -94,22 +94,22 @@ void TranslatorVisitor::HSET2_cbuf(u64 insn) { } void TranslatorVisitor::HSET2_imm(u64 insn) { - union { - u64 insn; - BitField<53, 1, u64> bf; - BitField<54, 1, u64> ftz; - BitField<49, 4, FPCompareOp> compare_op; - BitField<56, 1, u64> neg_high; - BitField<30, 9, u64> high; - BitField<29, 1, u64> neg_low; - BitField<20, 9, u64> low; - } const hset2{insn}; + union { + u64 insn; + BitField<53, 1, u64> bf; + BitField<54, 1, u64> ftz; + BitField<49, 4, FPCompareOp> compare_op; + BitField<56, 1, u64> neg_high; + BitField<30, 9, u64> high; + BitField<29, 1, u64> neg_low; + BitField<20, 9, u64> low; + } const hset2{insn}; - const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | - static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; + const u32 imm{static_cast(hset2.low << 6) | ((hset2.neg_low != 0 ? 1 : 0) << 15) | + static_cast(hset2.high << 22) | ((hset2.neg_high != 0 ? 1 : 0) << 31)}; - HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, - hset2.compare_op, Swizzle::H1_H0); + HSET2(*this, insn, ir.Imm32(imm), hset2.bf != 0, hset2.ftz != 0, false, false, hset2.compare_op, + Swizzle::H1_H0); } } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp index 6b1ac21d5..3e2a23c92 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/half_floating_point_set_predicate.cpp @@ -24,17 +24,17 @@ void HSETP2(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, bool neg_b, bo auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hsetp2.src_a_reg), hsetp2.swizzle_a)}; auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)}; - // TODO: Implement FP16 FloatingPointCompare - // if (lhs_a.Type() != lhs_b.Type()) { - if (lhs_a.Type() == IR::Type::F16) { - lhs_a = v.ir.FPConvert(32, lhs_a); - rhs_a = v.ir.FPConvert(32, rhs_a); + + if (lhs_a.Type() != lhs_b.Type()) { + if (lhs_a.Type() == IR::Type::F16) { + lhs_a = v.ir.FPConvert(32, lhs_a); + rhs_a = v.ir.FPConvert(32, rhs_a); + } + if (lhs_b.Type() == IR::Type::F16) { + lhs_b = v.ir.FPConvert(32, lhs_b); + rhs_b = v.ir.FPConvert(32, rhs_b); + } } - if (lhs_b.Type() == IR::Type::F16) { - lhs_b = v.ir.FPConvert(32, lhs_b); - rhs_b = v.ir.FPConvert(32, rhs_b); - } - //} lhs_a = v.ir.FPAbsNeg(lhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); rhs_a = v.ir.FPAbsNeg(rhs_a, hsetp2.abs_a != 0, hsetp2.neg_a != 0); diff --git a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp index baa3d22df..7723c9a57 100644 --- a/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp +++ b/src/shader_recompiler/ir_opt/lower_fp16_to_fp32.cpp @@ -50,6 +50,30 @@ IR::Opcode Replace(IR::Opcode op) { return IR::Opcode::CompositeInsertF32x3; case IR::Opcode::CompositeInsertF16x4: return IR::Opcode::CompositeInsertF32x4; + case IR::Opcode::FPOrdEqual16: + return IR::Opcode::FPOrdEqual32; + case IR::Opcode::FPUnordEqual16: + return IR::Opcode::FPUnordEqual32; + case IR::Opcode::FPOrdNotEqual16: + return IR::Opcode::FPOrdNotEqual32; + case IR::Opcode::FPUnordNotEqual16: + return IR::Opcode::FPUnordNotEqual32; + case IR::Opcode::FPOrdLessThan16: + return IR::Opcode::FPOrdLessThan32; + case IR::Opcode::FPUnordLessThan16: + return IR::Opcode::FPUnordLessThan32; + case IR::Opcode::FPOrdGreaterThan16: + return IR::Opcode::FPOrdGreaterThan32; + case IR::Opcode::FPUnordGreaterThan16: + return IR::Opcode::FPUnordGreaterThan32; + case IR::Opcode::FPOrdLessThanEqual16: + return IR::Opcode::FPOrdLessThanEqual32; + case IR::Opcode::FPUnordLessThanEqual16: + return IR::Opcode::FPUnordLessThanEqual32; + case IR::Opcode::FPOrdGreaterThanEqual16: + return IR::Opcode::FPOrdGreaterThanEqual32; + case IR::Opcode::FPUnordGreaterThanEqual16: + return IR::Opcode::FPUnordGreaterThanEqual32; case IR::Opcode::ConvertS16F16: return IR::Opcode::ConvertS16F32; case IR::Opcode::ConvertS32F16: