From fa2f6e38f4d465ba6e5efe6c6bd23d8ef39b080d Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 16 Mar 2021 00:57:07 -0400 Subject: [PATCH] shader: Implement FSET and FSETP Also fix oversight with adding SignedZeroInfNanPreserve execution mode. --- src/shader_recompiler/CMakeLists.txt | 2 + .../backend/spirv/emit_spirv.cpp | 6 +- .../maxwell/translate/impl/common_funcs.cpp | 48 +++++++++++++ .../maxwell/translate/impl/common_funcs.h | 6 ++ .../translate/impl/floating_point_compare.cpp | 68 ------------------- .../impl/floating_point_compare_and_set.cpp | 65 ++++++++++++++++++ .../impl/floating_point_set_predicate.cpp | 60 ++++++++++++++++ .../frontend/maxwell/translate/impl/impl.h | 19 ++++++ .../translate/impl/not_implemented.cpp | 24 ------- 9 files changed, 204 insertions(+), 94 deletions(-) create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp create mode 100644 src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp diff --git a/src/shader_recompiler/CMakeLists.txt b/src/shader_recompiler/CMakeLists.txt index 97e9b4c8e..6d2e804ca 100644 --- a/src/shader_recompiler/CMakeLists.txt +++ b/src/shader_recompiler/CMakeLists.txt @@ -66,12 +66,14 @@ add_library(shader_recompiler STATIC frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp + frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_min_max.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp + frontend/maxwell/translate/impl/floating_point_set_predicate.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.h diff --git a/src/shader_recompiler/backend/spirv/emit_spirv.cpp b/src/shader_recompiler/backend/spirv/emit_spirv.cpp index 7e7db9161..50c0f7243 100644 --- a/src/shader_recompiler/backend/spirv/emit_spirv.cpp +++ b/src/shader_recompiler/backend/spirv/emit_spirv.cpp @@ -124,10 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit ctx.AddExtension("SPV_KHR_float_controls"); if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); } if (profile.support_fp32_signed_zero_nan_preserve) { - ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); + ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); + ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); } if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp index 9d4ac2e36..af9a8f82c 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.cpp @@ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp } } +bool IsCompareOpOrdered(FPCompareOp op) { + switch (op) { + case FPCompareOp::LTU: + case FPCompareOp::EQU: + case FPCompareOp::LEU: + case FPCompareOp::GTU: + case FPCompareOp::NEU: + case FPCompareOp::GEU: + return false; + default: + return true; + } +} + +IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, + FPCompareOp compare_op, IR::FpControl control) { + const bool ordered{IsCompareOpOrdered(compare_op)}; + switch (compare_op) { + case FPCompareOp::F: + return ir.Imm1(false); + case FPCompareOp::LT: + case FPCompareOp::LTU: + return ir.FPLessThan(operand_1, operand_2, control, ordered); + case FPCompareOp::EQ: + case FPCompareOp::EQU: + return ir.FPEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::LE: + case FPCompareOp::LEU: + return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GT: + case FPCompareOp::GTU: + return ir.FPGreaterThan(operand_1, operand_2, control, ordered); + case FPCompareOp::NE: + case FPCompareOp::NEU: + return ir.FPNotEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::GE: + case FPCompareOp::GEU: + return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); + case FPCompareOp::NUM: + return ir.FPOrdered(operand_1, operand_2); + case FPCompareOp::Nan: + return ir.FPUnordered(operand_1, operand_2); + case FPCompareOp::T: + return ir.Imm1(true); + default: + throw NotImplementedException("Invalid FP compare op {}", compare_op); + } +} } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h index c9ae5c500..f8add3c34 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h @@ -15,4 +15,10 @@ namespace Shader::Maxwell { const IR::U1& predicate_2, BooleanOp bop); [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); + +[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); + +[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, + const IR::F32& operand_2, FPCompareOp compare_op, + IR::FpControl control = {}); } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp index f254ecb3a..e78e9c4e1 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare.cpp @@ -9,74 +9,6 @@ namespace Shader::Maxwell { namespace { -enum class FPCompareOp : u64 { - F, - LT, - EQ, - LE, - GT, - NE, - GE, - NUM, - Nan, - LTU, - EQU, - LEU, - GTU, - NEU, - GEU, - T, -}; - -bool IsCompareOpOrdered(FPCompareOp op) { - switch (op) { - case FPCompareOp::LTU: - case FPCompareOp::EQU: - case FPCompareOp::LEU: - case FPCompareOp::GTU: - case FPCompareOp::NEU: - case FPCompareOp::GEU: - return false; - default: - return true; - } -} - -IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, - FPCompareOp compare_op, IR::FpControl control) { - const bool ordered{IsCompareOpOrdered(compare_op)}; - switch (compare_op) { - case FPCompareOp::F: - return ir.Imm1(false); - case FPCompareOp::LT: - case FPCompareOp::LTU: - return ir.FPLessThan(operand_1, operand_2, control, ordered); - case FPCompareOp::EQ: - case FPCompareOp::EQU: - return ir.FPEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::LE: - case FPCompareOp::LEU: - return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GT: - case FPCompareOp::GTU: - return ir.FPGreaterThan(operand_1, operand_2, control, ordered); - case FPCompareOp::NE: - case FPCompareOp::NEU: - return ir.FPNotEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::GE: - case FPCompareOp::GEU: - return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); - case FPCompareOp::NUM: - return ir.FPOrdered(operand_1, operand_2); - case FPCompareOp::Nan: - return ir.FPUnordered(operand_1, operand_2); - case FPCompareOp::T: - return ir.Imm1(true); - default: - throw NotImplementedException("Invalid compare op {}", compare_op); - } -} - void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { union { u64 insn; diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp new file mode 100644 index 000000000..c5417775e --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp @@ -0,0 +1,65 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 8, IR::Reg> dest_reg; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> pred; + BitField<42, 1, u64> neg_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<48, 4, FPCompareOp> compare_op; + BitField<52, 1, u64> bf; + BitField<53, 1, u64> negate_b; + BitField<54, 1, u64> abs_a; + BitField<55, 1, u64> ftz; + } const fset{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + IR::U1 pred{v.ir.GetPred(fset.pred)}; + if (fset.neg_pred != 0) { + pred = v.ir.LogicalNot(pred); + } + const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; + const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; + + const IR::U32 one_mask{v.ir.Imm32(-1)}; + const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; + const IR::U32 fail_result{v.ir.Imm32(0)}; + const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; + + v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); +} +} // Anonymous namespace + +void TranslatorVisitor::FSET_reg(u64 insn) { + FSET(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSET_cbuf(u64 insn) { + FSET(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSET_imm(u64 insn) { + FSET(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp new file mode 100644 index 000000000..8ff9db843 --- /dev/null +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_set_predicate.cpp @@ -0,0 +1,60 @@ +// Copyright 2021 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/bit_field.h" +#include "common/common_types.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" +#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" + +namespace Shader::Maxwell { +namespace { +void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { + union { + u64 insn; + BitField<0, 3, IR::Pred> dest_pred_b; + BitField<3, 3, IR::Pred> dest_pred_a; + BitField<6, 1, u64> negate_b; + BitField<7, 1, u64> abs_a; + BitField<8, 8, IR::Reg> src_a_reg; + BitField<39, 3, IR::Pred> bop_pred; + BitField<42, 1, u64> neg_bop_pred; + BitField<43, 1, u64> negate_a; + BitField<44, 1, u64> abs_b; + BitField<45, 2, BooleanOp> bop; + BitField<47, 1, u64> ftz; + BitField<48, 4, FPCompareOp> compare_op; + } const fsetp{insn}; + + const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; + const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); + const IR::FpControl control{ + .no_contraction{false}, + .rounding{IR::FpRounding::DontCare}, + .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, + }; + + const BooleanOp bop{fsetp.bop}; + const FPCompareOp compare_op{fsetp.compare_op}; + const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; + const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; + const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; + const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; + v.ir.SetPred(fsetp.dest_pred_a, result_a); + v.ir.SetPred(fsetp.dest_pred_b, result_b); +} +} // Anonymous namespace + +void TranslatorVisitor::FSETP_reg(u64 insn) { + FSETP(*this, insn, GetFloatReg20(insn)); +} + +void TranslatorVisitor::FSETP_cbuf(u64 insn) { + FSETP(*this, insn, GetFloatCbuf(insn)); +} + +void TranslatorVisitor::FSETP_imm(u64 insn) { + FSETP(*this, insn, GetFloatImm20(insn)); +} + +} // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 45d6f5e06..761b64666 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -35,6 +35,25 @@ enum class PredicateOp : u64 { NonZero, }; +enum class FPCompareOp : u64 { + F, + LT, + EQ, + LE, + GT, + NE, + GE, + NUM, + Nan, + LTU, + EQU, + LEU, + GTU, + NEU, + GEU, + T, +}; + class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp index 9aa7b836c..b31928370 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp @@ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { ThrowNotImplemented(Opcode::FCHK_imm); } -void TranslatorVisitor::FSET_reg(u64) { - ThrowNotImplemented(Opcode::FSET_reg); -} - -void TranslatorVisitor::FSET_cbuf(u64) { - ThrowNotImplemented(Opcode::FSET_cbuf); -} - -void TranslatorVisitor::FSET_imm(u64) { - ThrowNotImplemented(Opcode::FSET_imm); -} - -void TranslatorVisitor::FSETP_reg(u64) { - ThrowNotImplemented(Opcode::FSETP_reg); -} - -void TranslatorVisitor::FSETP_cbuf(u64) { - ThrowNotImplemented(Opcode::FSETP_cbuf); -} - -void TranslatorVisitor::FSETP_imm(u64) { - ThrowNotImplemented(Opcode::FSETP_imm); -} - void TranslatorVisitor::FSWZADD(u64) { ThrowNotImplemented(Opcode::FSWZADD); }