yuzu-emu
/
yuzu
Archived
1
0
Fork 0

shader: Initial instruction support

This commit is contained in:
ReinUsesLisp 2021-02-03 16:43:04 -03:00 committed by ameerj
parent 6c4cc0cd06
commit d24a16045f
28 changed files with 1452 additions and 336 deletions

View File

@ -39,18 +39,27 @@ add_executable(shader_recompiler
frontend/maxwell/program.h frontend/maxwell/program.h
frontend/maxwell/termination_code.cpp frontend/maxwell/termination_code.cpp
frontend/maxwell/termination_code.h frontend/maxwell/termination_code.h
frontend/maxwell/translate/impl/common_encoding.h
frontend/maxwell/translate/impl/floating_point_add.cpp
frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp
frontend/maxwell/translate/impl/floating_point_multiply.cpp
frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.cpp
frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/impl.h
frontend/maxwell/translate/impl/integer_add.cpp
frontend/maxwell/translate/impl/integer_scaled_add.cpp
frontend/maxwell/translate/impl/integer_set_predicate.cpp
frontend/maxwell/translate/impl/integer_shift_left.cpp
frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
frontend/maxwell/translate/impl/load_store_attribute.cpp frontend/maxwell/translate/impl/load_store_attribute.cpp
frontend/maxwell/translate/impl/load_store_memory.cpp frontend/maxwell/translate/impl/load_store_memory.cpp
frontend/maxwell/translate/impl/not_implemented.cpp frontend/maxwell/translate/impl/not_implemented.cpp
frontend/maxwell/translate/impl/register_move.cpp frontend/maxwell/translate/impl/move_register.cpp
frontend/maxwell/translate/impl/move_special_register.cpp
frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.cpp
frontend/maxwell/translate/translate.h frontend/maxwell/translate/translate.h
ir_opt/dead_code_elimination_pass.cpp ir_opt/dead_code_elimination_pass.cpp
ir_opt/get_set_elimination_pass.cpp
ir_opt/identity_removal_pass.cpp ir_opt/identity_removal_pass.cpp
ir_opt/passes.h ir_opt/passes.h
ir_opt/ssa_rewrite_pass.cpp ir_opt/ssa_rewrite_pass.cpp

View File

@ -23,8 +23,8 @@ void Block::AppendNewInst(Opcode op, std::initializer_list<Value> args) {
} }
Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op, Block::iterator Block::PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args) { std::initializer_list<Value> args, u64 flags) {
Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op)}; Inst* const inst{std::construct_at(instruction_alloc_pool.allocate(), op, flags)};
const auto result_it{instructions.insert(insertion_point, *inst)}; const auto result_it{instructions.insert(insertion_point, *inst)};
if (inst->NumArgs() != args.size()) { if (inst->NumArgs() != args.size()) {

View File

@ -39,7 +39,7 @@ public:
/// Prepends a new instruction to this basic block before the insertion point. /// Prepends a new instruction to this basic block before the insertion point.
iterator PrependNewInst(iterator insertion_point, Opcode op, iterator PrependNewInst(iterator insertion_point, Opcode op,
std::initializer_list<Value> args = {}); std::initializer_list<Value> args = {}, u64 flags = 0);
/// Adds a new immediate predecessor to the basic block. /// Adds a new immediate predecessor to the basic block.
void AddImmediatePredecessor(IR::Block* immediate_predecessor); void AddImmediatePredecessor(IR::Block* immediate_predecessor);

View File

@ -129,6 +129,58 @@ void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) {
Inst(Opcode::SetAttribute, attribute, value); Inst(Opcode::SetAttribute, attribute, value);
} }
U32 IREmitter::WorkgroupIdX() {
return Inst<U32>(Opcode::WorkgroupIdX);
}
U32 IREmitter::WorkgroupIdY() {
return Inst<U32>(Opcode::WorkgroupIdY);
}
U32 IREmitter::WorkgroupIdZ() {
return Inst<U32>(Opcode::WorkgroupIdZ);
}
U32 IREmitter::LocalInvocationIdX() {
return Inst<U32>(Opcode::LocalInvocationIdX);
}
U32 IREmitter::LocalInvocationIdY() {
return Inst<U32>(Opcode::LocalInvocationIdY);
}
U32 IREmitter::LocalInvocationIdZ() {
return Inst<U32>(Opcode::LocalInvocationIdZ);
}
U32 IREmitter::LoadGlobalU8(const U64& address) {
return Inst<U32>(Opcode::LoadGlobalU8, address);
}
U32 IREmitter::LoadGlobalS8(const U64& address) {
return Inst<U32>(Opcode::LoadGlobalS8, address);
}
U32 IREmitter::LoadGlobalU16(const U64& address) {
return Inst<U32>(Opcode::LoadGlobalU16, address);
}
U32 IREmitter::LoadGlobalS16(const U64& address) {
return Inst<U32>(Opcode::LoadGlobalS16, address);
}
U32 IREmitter::LoadGlobal32(const U64& address) {
return Inst<U32>(Opcode::LoadGlobal32, address);
}
Value IREmitter::LoadGlobal64(const U64& address) {
return Inst<Value>(Opcode::LoadGlobal64, address);
}
Value IREmitter::LoadGlobal128(const U64& address) {
return Inst<Value>(Opcode::LoadGlobal128, address);
}
void IREmitter::WriteGlobalU8(const U64& address, const U32& value) { void IREmitter::WriteGlobalU8(const U64& address, const U32& value) {
Inst(Opcode::WriteGlobalU8, address, value); Inst(Opcode::WriteGlobalU8, address, value);
} }
@ -173,17 +225,17 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
return Inst<U1>(Opcode::GetOverflowFromOp, op); return Inst<U1>(Opcode::GetOverflowFromOp, op);
} }
U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) { U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) {
if (a.Type() != a.Type()) { if (a.Type() != a.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
} }
switch (a.Type()) { switch (a.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPAdd16, a, b); return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPAdd32, a, b); return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPAdd64, a, b); return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b);
default: default:
ThrowInvalidType(a.Type()); ThrowInvalidType(a.Type());
} }
@ -191,14 +243,14 @@ U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b) {
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) {
if (e1.Type() != e2.Type()) { if (e1.Type() != e2.Type()) {
throw InvalidArgument("Incompatible types {} {}", e1.Type(), e2.Type()); throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
} }
return Inst(Opcode::CompositeConstruct2, e1, e2); return Inst(Opcode::CompositeConstruct2, e1, e2);
} }
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) {
if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type()); throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
} }
return Inst(Opcode::CompositeConstruct3, e1, e2, e3); return Inst(Opcode::CompositeConstruct3, e1, e2, e3);
} }
@ -206,8 +258,8 @@ Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny&
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3,
const UAny& e4) { const UAny& e4) {
if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
throw InvalidArgument("Incompatible types {} {} {}", e1.Type(), e2.Type(), e3.Type(), throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
e4.Type()); e3.Type(), e4.Type());
} }
return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4);
} }
@ -219,6 +271,24 @@ UAny IREmitter::CompositeExtract(const Value& vector, size_t element) {
return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element)));
} }
UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
if (true_value.Type() != false_value.Type()) {
throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
}
switch (true_value.Type()) {
case Type::U8:
return Inst<UAny>(Opcode::Select8, condition, true_value, false_value);
case Type::U16:
return Inst<UAny>(Opcode::Select16, condition, true_value, false_value);
case Type::U32:
return Inst<UAny>(Opcode::Select32, condition, true_value, false_value);
case Type::U64:
return Inst<UAny>(Opcode::Select64, condition, true_value, false_value);
default:
throw InvalidArgument("Invalid type {}", true_value.Type());
}
}
U64 IREmitter::PackUint2x32(const Value& vector) { U64 IREmitter::PackUint2x32(const Value& vector) {
return Inst<U64>(Opcode::PackUint2x32, vector); return Inst<U64>(Opcode::PackUint2x32, vector);
} }
@ -243,17 +313,34 @@ Value IREmitter::UnpackDouble2x32(const U64& value) {
return Inst<Value>(Opcode::UnpackDouble2x32, value); return Inst<Value>(Opcode::UnpackDouble2x32, value);
} }
U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b) { U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
} }
switch (a.Type()) { switch (a.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPMul16, a, b); return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPMul32, a, b); return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPMul64, a, b); return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
FpControl control) {
if (a.Type() != b.Type() || a.Type() != c.Type()) {
throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
}
switch (a.Type()) {
case Type::U16:
return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c);
case Type::U32:
return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c);
case Type::U64:
return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c);
default: default:
ThrowInvalidType(a.Type()); ThrowInvalidType(a.Type());
} }
@ -403,6 +490,91 @@ U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) {
} }
} }
U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
}
switch (a.Type()) {
case Type::U32:
return Inst<U32>(Opcode::IAdd32, a, b);
case Type::U64:
return Inst<U64>(Opcode::IAdd64, a, b);
default:
ThrowInvalidType(a.Type());
}
}
U32 IREmitter::IMul(const U32& a, const U32& b) {
return Inst<U32>(Opcode::IMul32, a, b);
}
U32 IREmitter::INeg(const U32& value) {
return Inst<U32>(Opcode::INeg32, value);
}
U32 IREmitter::IAbs(const U32& value) {
return Inst<U32>(Opcode::IAbs32, value);
}
U32 IREmitter::ShiftLeftLogical(const U32& base, const U32& shift) {
return Inst<U32>(Opcode::ShiftLeftLogical32, base, shift);
}
U32 IREmitter::ShiftRightLogical(const U32& base, const U32& shift) {
return Inst<U32>(Opcode::ShiftRightLogical32, base, shift);
}
U32 IREmitter::ShiftRightArithmetic(const U32& base, const U32& shift) {
return Inst<U32>(Opcode::ShiftRightArithmetic32, base, shift);
}
U32 IREmitter::BitwiseAnd(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseAnd32, a, b);
}
U32 IREmitter::BitwiseOr(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseOr32, a, b);
}
U32 IREmitter::BitwiseXor(const U32& a, const U32& b) {
return Inst<U32>(Opcode::BitwiseXor32, a, b);
}
U32 IREmitter::BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
const U32& count) {
return Inst<U32>(Opcode::BitFieldInsert, base, insert, offset, count);
}
U32 IREmitter::BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed) {
return Inst<U32>(is_signed ? Opcode::BitFieldSExtract : Opcode::BitFieldUExtract, base, offset,
count);
}
U1 IREmitter::ILessThan(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThan : Opcode::ULessThan, lhs, rhs);
}
U1 IREmitter::IEqual(const U32& lhs, const U32& rhs) {
return Inst<U1>(Opcode::IEqual, lhs, rhs);
}
U1 IREmitter::ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SLessThanEqual : Opcode::ULessThanEqual, lhs, rhs);
}
U1 IREmitter::IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThan : Opcode::UGreaterThan, lhs, rhs);
}
U1 IREmitter::INotEqual(const U32& lhs, const U32& rhs) {
return Inst<U1>(Opcode::INotEqual, lhs, rhs);
}
U1 IREmitter::IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed) {
return Inst<U1>(is_signed ? Opcode::SGreaterThanEqual : Opcode::UGreaterThanEqual, lhs, rhs);
}
U1 IREmitter::LogicalOr(const U1& a, const U1& b) { U1 IREmitter::LogicalOr(const U1& a, const U1& b) {
return Inst<U1>(Opcode::LogicalOr, a, b); return Inst<U1>(Opcode::LogicalOr, a, b);
} }
@ -411,6 +583,10 @@ U1 IREmitter::LogicalAnd(const U1& a, const U1& b) {
return Inst<U1>(Opcode::LogicalAnd, a, b); return Inst<U1>(Opcode::LogicalAnd, a, b);
} }
U1 IREmitter::LogicalXor(const U1& a, const U1& b) {
return Inst<U1>(Opcode::LogicalXor, a, b);
}
U1 IREmitter::LogicalNot(const U1& value) { U1 IREmitter::LogicalNot(const U1& value) {
return Inst<U1>(Opcode::LogicalNot, value); return Inst<U1>(Opcode::LogicalNot, value);
} }

View File

@ -4,8 +4,12 @@
#pragma once #pragma once
#include <cstring>
#include <type_traits>
#include "shader_recompiler/frontend/ir/attribute.h" #include "shader_recompiler/frontend/ir/attribute.h"
#include "shader_recompiler/frontend/ir/basic_block.h" #include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/ir/value.h" #include "shader_recompiler/frontend/ir/value.h"
namespace Shader::IR { namespace Shader::IR {
@ -52,6 +56,22 @@ public:
[[nodiscard]] U32 GetAttribute(IR::Attribute attribute); [[nodiscard]] U32 GetAttribute(IR::Attribute attribute);
void SetAttribute(IR::Attribute attribute, const U32& value); void SetAttribute(IR::Attribute attribute, const U32& value);
[[nodiscard]] U32 WorkgroupIdX();
[[nodiscard]] U32 WorkgroupIdY();
[[nodiscard]] U32 WorkgroupIdZ();
[[nodiscard]] U32 LocalInvocationIdX();
[[nodiscard]] U32 LocalInvocationIdY();
[[nodiscard]] U32 LocalInvocationIdZ();
[[nodiscard]] U32 LoadGlobalU8(const U64& address);
[[nodiscard]] U32 LoadGlobalS8(const U64& address);
[[nodiscard]] U32 LoadGlobalU16(const U64& address);
[[nodiscard]] U32 LoadGlobalS16(const U64& address);
[[nodiscard]] U32 LoadGlobal32(const U64& address);
[[nodiscard]] Value LoadGlobal64(const U64& address);
[[nodiscard]] Value LoadGlobal128(const U64& address);
void WriteGlobalU8(const U64& address, const U32& value); void WriteGlobalU8(const U64& address, const U32& value);
void WriteGlobalS8(const U64& address, const U32& value); void WriteGlobalS8(const U64& address, const U32& value);
void WriteGlobalU16(const U64& address, const U32& value); void WriteGlobalU16(const U64& address, const U32& value);
@ -71,6 +91,8 @@ public:
const UAny& e4); const UAny& e4);
[[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); [[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
[[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] U64 PackUint2x32(const Value& vector);
[[nodiscard]] Value UnpackUint2x32(const U64& value); [[nodiscard]] Value UnpackUint2x32(const U64& value);
@ -80,8 +102,10 @@ public:
[[nodiscard]] U64 PackDouble2x32(const Value& vector); [[nodiscard]] U64 PackDouble2x32(const Value& vector);
[[nodiscard]] Value UnpackDouble2x32(const U64& value); [[nodiscard]] Value UnpackDouble2x32(const U64& value);
[[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b); [[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
[[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b); [[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {});
[[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c,
FpControl control = {});
[[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); [[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value);
[[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); [[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value);
@ -100,8 +124,31 @@ public:
[[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); [[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value);
[[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); [[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32 IMul(const U32& a, const U32& b);
[[nodiscard]] U32 INeg(const U32& value);
[[nodiscard]] U32 IAbs(const U32& value);
[[nodiscard]] U32 ShiftLeftLogical(const U32& base, const U32& shift);
[[nodiscard]] U32 ShiftRightLogical(const U32& base, const U32& shift);
[[nodiscard]] U32 ShiftRightArithmetic(const U32& base, const U32& shift);
[[nodiscard]] U32 BitwiseAnd(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseOr(const U32& a, const U32& b);
[[nodiscard]] U32 BitwiseXor(const U32& a, const U32& b);
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
const U32& count);
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
bool is_signed);
[[nodiscard]] U1 ILessThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IEqual(const U32& lhs, const U32& rhs);
[[nodiscard]] U1 ILessThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 IGreaterThan(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 INotEqual(const U32& lhs, const U32& rhs);
[[nodiscard]] U1 IGreaterThanEqual(const U32& lhs, const U32& rhs, bool is_signed);
[[nodiscard]] U1 LogicalOr(const U1& a, const U1& b); [[nodiscard]] U1 LogicalOr(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b); [[nodiscard]] U1 LogicalAnd(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalNot(const U1& value); [[nodiscard]] U1 LogicalNot(const U1& value);
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value);
@ -118,6 +165,22 @@ private:
auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})}; auto it{block.PrependNewInst(insertion_point, op, {Value{args}...})};
return T{Value{&*it}}; return T{Value{&*it}};
} }
template <typename T>
requires(sizeof(T) <= sizeof(u64) && std::is_trivially_copyable_v<T>) struct Flags {
Flags() = default;
Flags(T proxy_) : proxy{proxy_} {}
T proxy;
};
template <typename T = Value, typename FlagType, typename... Args>
T Inst(Opcode op, Flags<FlagType> flags, Args... args) {
u64 raw_flags{};
std::memcpy(&raw_flags, &flags.proxy, sizeof(flags.proxy));
auto it{block.PrependNewInst(insertion_point, op, {Value{args}...}, raw_flags)};
return T{Value{&*it}};
}
}; };
} // namespace Shader::IR } // namespace Shader::IR

View File

@ -5,7 +5,9 @@
#pragma once #pragma once
#include <array> #include <array>
#include <cstring>
#include <span> #include <span>
#include <type_traits>
#include <vector> #include <vector>
#include <boost/intrusive/list.hpp> #include <boost/intrusive/list.hpp>
@ -23,7 +25,7 @@ constexpr size_t MAX_ARG_COUNT = 4;
class Inst : public boost::intrusive::list_base_hook<> { class Inst : public boost::intrusive::list_base_hook<> {
public: public:
explicit Inst(Opcode op_) noexcept : op(op_) {} explicit Inst(Opcode op_, u64 flags_) noexcept : op{op_}, flags{flags_} {}
/// Get the number of uses this instruction has. /// Get the number of uses this instruction has.
[[nodiscard]] int UseCount() const noexcept { [[nodiscard]] int UseCount() const noexcept {
@ -73,6 +75,14 @@ public:
void ReplaceUsesWith(Value replacement); void ReplaceUsesWith(Value replacement);
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u64) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] FlagsType Flags() const noexcept {
FlagsType ret;
std::memcpy(&ret, &flags, sizeof(ret));
return ret;
}
private: private:
void Use(const Value& value); void Use(const Value& value);
void UndoUse(const Value& value); void UndoUse(const Value& value);

View File

@ -0,0 +1,28 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
namespace Shader::IR {
enum class FmzMode {
None, // Denorms are not flushed, NAN is propagated (nouveau)
FTZ, // Flush denorms to zero, NAN is propagated (D3D11, NVN, GL, VK)
FMZ, // Flush denorms to zero, x * 0 == 0 (D3D9)
};
enum class FpRounding {
RN, // Round to nearest even,
RM, // Round towards negative infinity
RP, // Round towards positive infinity
RZ, // Round towards zero
};
struct FpControl {
bool no_contraction{false};
FpRounding rounding : 8 = FpRounding::RN;
FmzMode fmz_mode : 8 = FmzMode::FTZ;
};
static_assert(sizeof(FpControl) <= sizeof(u64));
} // namespace Shader::IR

View File

@ -35,6 +35,12 @@ OPCODE(SetZFlag, Void, U1,
OPCODE(SetSFlag, Void, U1, ) OPCODE(SetSFlag, Void, U1, )
OPCODE(SetCFlag, Void, U1, ) OPCODE(SetCFlag, Void, U1, )
OPCODE(SetOFlag, Void, U1, ) OPCODE(SetOFlag, Void, U1, )
OPCODE(WorkgroupIdX, U32, )
OPCODE(WorkgroupIdY, U32, )
OPCODE(WorkgroupIdZ, U32, )
OPCODE(LocalInvocationIdX, U32, )
OPCODE(LocalInvocationIdY, U32, )
OPCODE(LocalInvocationIdZ, U32, )
// Undefined // Undefined
OPCODE(Undef1, U1, ) OPCODE(Undef1, U1, )
@ -44,6 +50,13 @@ OPCODE(Undef32, U32,
OPCODE(Undef64, U64, ) OPCODE(Undef64, U64, )
// Memory operations // Memory operations
OPCODE(LoadGlobalU8, U32, U64, )
OPCODE(LoadGlobalS8, U32, U64, )
OPCODE(LoadGlobalU16, U32, U64, )
OPCODE(LoadGlobalS16, U32, U64, )
OPCODE(LoadGlobal32, U32, U64, )
OPCODE(LoadGlobal64, Opaque, U64, )
OPCODE(LoadGlobal128, Opaque, U64, )
OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalU8, Void, U64, U32, )
OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, )
OPCODE(WriteGlobalU16, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, )
@ -58,6 +71,12 @@ OPCODE(CompositeConstruct3, Opaque, Opaq
OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, )
OPCODE(CompositeExtract, Opaque, Opaque, U32, ) OPCODE(CompositeExtract, Opaque, Opaque, U32, )
// Select operations
OPCODE(Select8, U8, U1, U8, U8, )
OPCODE(Select16, U16, U1, U16, U16, )
OPCODE(Select32, U32, U1, U32, U32, )
OPCODE(Select64, U64, U1, U64, U64, )
// Bitwise conversions // Bitwise conversions
OPCODE(PackUint2x32, U64, Opaque, ) OPCODE(PackUint2x32, U64, Opaque, )
OPCODE(UnpackUint2x32, Opaque, U64, ) OPCODE(UnpackUint2x32, Opaque, U64, )
@ -74,56 +93,84 @@ OPCODE(GetOverflowFromOp, U1, Opaq
OPCODE(GetZSCOFromOp, ZSCO, Opaque, ) OPCODE(GetZSCOFromOp, ZSCO, Opaque, )
// Floating-point operations // Floating-point operations
OPCODE(FPAbs16, U16, U16 ) OPCODE(FPAbs16, U16, U16, )
OPCODE(FPAbs32, U32, U32 ) OPCODE(FPAbs32, U32, U32, )
OPCODE(FPAbs64, U64, U64 ) OPCODE(FPAbs64, U64, U64, )
OPCODE(FPAdd16, U16, U16, U16 ) OPCODE(FPAdd16, U16, U16, U16, )
OPCODE(FPAdd32, U32, U32, U32 ) OPCODE(FPAdd32, U32, U32, U32, )
OPCODE(FPAdd64, U64, U64, U64 ) OPCODE(FPAdd64, U64, U64, U64, )
OPCODE(FPFma16, U16, U16, U16 ) OPCODE(FPFma16, U16, U16, U16, U16, )
OPCODE(FPFma32, U32, U32, U32 ) OPCODE(FPFma32, U32, U32, U32, U32, )
OPCODE(FPFma64, U64, U64, U64 ) OPCODE(FPFma64, U64, U64, U64, U64, )
OPCODE(FPMax32, U32, U32, U32 ) OPCODE(FPMax32, U32, U32, U32, )
OPCODE(FPMax64, U64, U64, U64 ) OPCODE(FPMax64, U64, U64, U64, )
OPCODE(FPMin32, U32, U32, U32 ) OPCODE(FPMin32, U32, U32, U32, )
OPCODE(FPMin64, U64, U64, U64 ) OPCODE(FPMin64, U64, U64, U64, )
OPCODE(FPMul16, U16, U16, U16 ) OPCODE(FPMul16, U16, U16, U16, )
OPCODE(FPMul32, U32, U32, U32 ) OPCODE(FPMul32, U32, U32, U32, )
OPCODE(FPMul64, U64, U64, U64 ) OPCODE(FPMul64, U64, U64, U64, )
OPCODE(FPNeg16, U16, U16 ) OPCODE(FPNeg16, U16, U16, )
OPCODE(FPNeg32, U32, U32 ) OPCODE(FPNeg32, U32, U32, )
OPCODE(FPNeg64, U64, U64 ) OPCODE(FPNeg64, U64, U64, )
OPCODE(FPRecip32, U32, U32 ) OPCODE(FPRecip32, U32, U32, )
OPCODE(FPRecip64, U64, U64 ) OPCODE(FPRecip64, U64, U64, )
OPCODE(FPRecipSqrt32, U32, U32 ) OPCODE(FPRecipSqrt32, U32, U32, )
OPCODE(FPRecipSqrt64, U64, U64 ) OPCODE(FPRecipSqrt64, U64, U64, )
OPCODE(FPSqrt, U32, U32 ) OPCODE(FPSqrt, U32, U32, )
OPCODE(FPSin, U32, U32 ) OPCODE(FPSin, U32, U32, )
OPCODE(FPSinNotReduced, U32, U32 ) OPCODE(FPSinNotReduced, U32, U32, )
OPCODE(FPExp2, U32, U32 ) OPCODE(FPExp2, U32, U32, )
OPCODE(FPExp2NotReduced, U32, U32 ) OPCODE(FPExp2NotReduced, U32, U32, )
OPCODE(FPCos, U32, U32 ) OPCODE(FPCos, U32, U32, )
OPCODE(FPCosNotReduced, U32, U32 ) OPCODE(FPCosNotReduced, U32, U32, )
OPCODE(FPLog2, U32, U32 ) OPCODE(FPLog2, U32, U32, )
OPCODE(FPSaturate16, U16, U16 ) OPCODE(FPSaturate16, U16, U16, )
OPCODE(FPSaturate32, U32, U32 ) OPCODE(FPSaturate32, U32, U32, )
OPCODE(FPSaturate64, U64, U64 ) OPCODE(FPSaturate64, U64, U64, )
OPCODE(FPRoundEven16, U16, U16 ) OPCODE(FPRoundEven16, U16, U16, )
OPCODE(FPRoundEven32, U32, U32 ) OPCODE(FPRoundEven32, U32, U32, )
OPCODE(FPRoundEven64, U64, U64 ) OPCODE(FPRoundEven64, U64, U64, )
OPCODE(FPFloor16, U16, U16 ) OPCODE(FPFloor16, U16, U16, )
OPCODE(FPFloor32, U32, U32 ) OPCODE(FPFloor32, U32, U32, )
OPCODE(FPFloor64, U64, U64 ) OPCODE(FPFloor64, U64, U64, )
OPCODE(FPCeil16, U16, U16 ) OPCODE(FPCeil16, U16, U16, )
OPCODE(FPCeil32, U32, U32 ) OPCODE(FPCeil32, U32, U32, )
OPCODE(FPCeil64, U64, U64 ) OPCODE(FPCeil64, U64, U64, )
OPCODE(FPTrunc16, U16, U16 ) OPCODE(FPTrunc16, U16, U16, )
OPCODE(FPTrunc32, U32, U32 ) OPCODE(FPTrunc32, U32, U32, )
OPCODE(FPTrunc64, U64, U64 ) OPCODE(FPTrunc64, U64, U64, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )
OPCODE(IAdd64, U64, U64, U64, )
OPCODE(IMul32, U32, U32, U32, )
OPCODE(INeg32, U32, U32, )
OPCODE(IAbs32, U32, U32, )
OPCODE(ShiftLeftLogical32, U32, U32, U32, )
OPCODE(ShiftRightLogical32, U32, U32, U32, )
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
OPCODE(BitwiseAnd32, U32, U32, U32, )
OPCODE(BitwiseOr32, U32, U32, U32, )
OPCODE(BitwiseXor32, U32, U32, U32, )
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
OPCODE(SLessThan, U1, U32, U32, )
OPCODE(ULessThan, U1, U32, U32, )
OPCODE(IEqual, U1, U32, U32, )
OPCODE(SLessThanEqual, U1, U32, U32, )
OPCODE(ULessThanEqual, U1, U32, U32, )
OPCODE(SGreaterThan, U1, U32, U32, )
OPCODE(UGreaterThan, U1, U32, U32, )
OPCODE(INotEqual, U1, U32, U32, )
OPCODE(SGreaterThanEqual, U1, U32, U32, )
OPCODE(UGreaterThanEqual, U1, U32, U32, )
// Logical operations // Logical operations
OPCODE(LogicalOr, U1, U1, U1, ) OPCODE(LogicalOr, U1, U1, U1, )
OPCODE(LogicalAnd, U1, U1, U1, ) OPCODE(LogicalAnd, U1, U1, U1, )
OPCODE(LogicalXor, U1, U1, U1, )
OPCODE(LogicalNot, U1, U1, ) OPCODE(LogicalNot, U1, U1, )
// Conversion operations // Conversion operations

View File

@ -8,7 +8,16 @@
namespace Shader::IR { namespace Shader::IR {
enum class Pred { P0, P1, P2, P3, P4, P5, P6, PT }; enum class Pred : u64 {
P0,
P1,
P2,
P3,
P4,
P5,
P6,
PT,
};
constexpr size_t NUM_USER_PREDS = 6; constexpr size_t NUM_USER_PREDS = 6;
constexpr size_t NUM_PREDS = 7; constexpr size_t NUM_PREDS = 7;

View File

@ -56,6 +56,7 @@ Program::Program(Environment& env, const Flow::CFG& cfg) {
Optimization::Invoke(Optimization::IdentityRemovalPass, function); Optimization::Invoke(Optimization::IdentityRemovalPass, function);
// Optimization::Invoke(Optimization::VerificationPass, function); // Optimization::Invoke(Optimization::VerificationPass, function);
} }
//*/
} }
std::string DumpProgram(const Program& program) { std::string DumpProgram(const Program& program) {

View File

@ -0,0 +1,56 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
namespace Shader::Maxwell {
enum class FpRounding : u64 {
RN,
RM,
RP,
RZ,
};
enum class FmzMode : u64 {
None,
FTZ,
FMZ,
INVALIDFMZ3,
};
inline IR::FpRounding CastFpRounding(FpRounding fp_rounding) {
switch (fp_rounding) {
case FpRounding::RN:
return IR::FpRounding::RN;
case FpRounding::RM:
return IR::FpRounding::RM;
case FpRounding::RP:
return IR::FpRounding::RP;
case FpRounding::RZ:
return IR::FpRounding::RZ;
}
throw NotImplementedException("Invalid floating-point rounding {}", fp_rounding);
}
inline IR::FmzMode CastFmzMode(FmzMode fmz_mode) {
switch (fmz_mode) {
case FmzMode::None:
return IR::FmzMode::None;
case FmzMode::FTZ:
return IR::FmzMode::FTZ;
case FmzMode::FMZ:
return IR::FmzMode::FMZ;
case FmzMode::INVALIDFMZ3:
break;
}
throw NotImplementedException("Invalid FMZ mode {}", fmz_mode);
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fadd{insn};
if (sat) {
throw NotImplementedException("FADD SAT");
}
if (cc) {
throw NotImplementedException("FADD CC");
}
const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)};
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
IR::FpControl control{
.no_contraction{true},
.rounding{CastFpRounding(fp_rounding)},
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
};
v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
}
void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<44, 1, u64> ftz;
BitField<45, 1, u64> neg_b;
BitField<46, 1, u64> abs_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_a;
BitField<49, 1, u64> abs_b;
BitField<50, 1, u64> sat;
} const fadd{insn};
FADD(v, insn, fadd.sat != 0, fadd.cc != 0, fadd.ftz != 0, fadd.fp_rounding, src_b,
fadd.abs_a != 0, fadd.neg_a != 0, fadd.abs_b != 0, fadd.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FADD_reg(u64 insn) {
FADD(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::FADD_cbuf(u64) {
throw NotImplementedException("FADD (cbuf)");
}
void TranslatorVisitor::FADD_imm(u64) {
throw NotImplementedException("FADD (imm)");
}
void TranslatorVisitor::FADD32I(u64) {
throw NotImplementedException("FADD32I");
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,73 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a,
bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const ffma{insn};
if (sat) {
throw NotImplementedException("FFMA SAT");
}
if (cc) {
throw NotImplementedException("FFMA CC");
}
const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)};
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
const IR::FpControl fp_control{
.no_contraction{true},
.rounding{CastFpRounding(fp_rounding)},
.fmz_mode{CastFmzMode(fmz_mode)},
};
v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
}
void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) {
union {
u64 raw;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_c;
BitField<50, 1, u64> sat;
BitField<51, 2, FpRounding> fp_rounding;
BitField<53, 2, FmzMode> fmz_mode;
} const ffma{insn};
FFMA(v, insn, src_b, src_c, false, ffma.neg_b != 0, ffma.neg_c != 0, ffma.sat != 0,
ffma.cc != 0, ffma.fmz_mode, ffma.fp_rounding);
}
} // Anonymous namespace
void TranslatorVisitor::FFMA_reg(u64 insn) {
FFMA(*this, insn, GetReg20(insn), GetReg39(insn));
}
void TranslatorVisitor::FFMA_rc(u64) {
throw NotImplementedException("FFMA (rc)");
}
void TranslatorVisitor::FFMA_cr(u64 insn) {
FFMA(*this, insn, GetCbuf(insn), GetReg39(insn));
}
void TranslatorVisitor::FFMA_imm(u64) {
throw NotImplementedException("FFMA (imm)");
}
void TranslatorVisitor::FFMA32I(u64) {
throw NotImplementedException("FFMA32I");
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,108 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Scale : u64 {
None,
D2,
D4,
D8,
M8,
M4,
M2,
INVALIDSCALE37,
};
float ScaleFactor(Scale scale) {
switch (scale) {
case Scale::None:
return 1.0f;
case Scale::D2:
return 1.0f / 2.0f;
case Scale::D4:
return 1.0f / 4.0f;
case Scale::D8:
return 1.0f / 8.0f;
case Scale::M8:
return 8.0f;
case Scale::M4:
return 4.0f;
case Scale::M2:
return 2.0f;
case Scale::INVALIDSCALE37:
break;
}
throw NotImplementedException("Invalid FMUL scale {}", scale);
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode,
FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const fmul{insn};
if (cc) {
throw NotImplementedException("FMUL CC");
}
if (sat) {
throw NotImplementedException("FMUL SAT");
}
IR::U32 op_a{v.X(fmul.src_a)};
if (scale != Scale::None) {
if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
}
op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
}
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::FpControl fp_control{
.no_contraction{true},
.rounding{CastFpRounding(fp_rounding)},
.fmz_mode{CastFmzMode(fmz_mode)},
};
v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
}
void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
union {
u64 raw;
BitField<39, 2, FpRounding> fp_rounding;
BitField<41, 3, Scale> scale;
BitField<44, 2, FmzMode> fmz;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> neg_b;
BitField<50, 1, u64> sat;
} fmul{insn};
FMUL(v, insn, src_b, fmul.fmz, fmul.fp_rounding, fmul.scale, fmul.sat != 0, fmul.cc != 0,
fmul.neg_b != 0);
}
} // Anonymous namespace
void TranslatorVisitor::FMUL_reg(u64 insn) {
return FMUL(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::FMUL_cbuf(u64) {
throw NotImplementedException("FMUL (cbuf)");
}
void TranslatorVisitor::FMUL_imm(u64) {
throw NotImplementedException("FMUL (imm)");
}
void TranslatorVisitor::FMUL32I(u64) {
throw NotImplementedException("FMUL32I");
}
} // namespace Shader::Maxwell

View File

@ -16,6 +16,22 @@ void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
ir.SetReg(dest_reg, value); ir.SetReg(dest_reg, value);
} }
IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
union {
u64 raw;
BitField<39, 8, IR::Reg> index;
} const reg{insn};
return X(reg.index);
}
IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
union { union {
u64 raw; u64 raw;
@ -33,7 +49,7 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
return ir.GetCbuf(binding, byte_offset); return ir.GetCbuf(binding, byte_offset);
} }
IR::U32 TranslatorVisitor::GetImm(u64 insn) { IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
union { union {
u64 raw; u64 raw;
BitField<20, 19, u64> value; BitField<20, 19, u64> value;
@ -44,6 +60,14 @@ IR::U32 TranslatorVisitor::GetImm(u64 insn) {
return ir.Imm32(value); return ir.Imm32(value);
} }
IR::U32 TranslatorVisitor::GetImm32(u64 insn) {
union {
u64 raw;
BitField<20, 32, u64> value;
} const imm{insn};
return ir.Imm32(static_cast<u32>(imm.value));
}
void TranslatorVisitor::SetZFlag(const IR::U1& value) { void TranslatorVisitor::SetZFlag(const IR::U1& value) {
ir.SetZFlag(value); ir.SetZFlag(value);
} }

View File

@ -46,7 +46,7 @@ public:
void DADD_reg(u64 insn); void DADD_reg(u64 insn);
void DADD_cbuf(u64 insn); void DADD_cbuf(u64 insn);
void DADD_imm(u64 insn); void DADD_imm(u64 insn);
void DEPBAR(u64 insn); void DEPBAR();
void DFMA_reg(u64 insn); void DFMA_reg(u64 insn);
void DFMA_rc(u64 insn); void DFMA_rc(u64 insn);
void DFMA_cr(u64 insn); void DFMA_cr(u64 insn);
@ -298,9 +298,14 @@ public:
[[nodiscard]] IR::U32 X(IR::Reg reg); [[nodiscard]] IR::U32 X(IR::Reg reg);
void X(IR::Reg dest_reg, const IR::U32& value); void X(IR::Reg dest_reg, const IR::U32& value);
[[nodiscard]] IR::U32 GetReg20(u64 insn);
[[nodiscard]] IR::U32 GetReg39(u64 insn);
[[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn);
[[nodiscard]] IR::U32 GetImm(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn);
[[nodiscard]] IR::U32 GetImm32(u64 insn);
void SetZFlag(const IR::U1& value); void SetZFlag(const IR::U1& value);
void SetSFlag(const IR::U1& value); void SetSFlag(const IR::U1& value);

View File

@ -0,0 +1,106 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void IADD(TranslatorVisitor& v, u64 insn, const IR::U32 op_b, bool neg_a, bool po, bool sat, bool x,
bool cc) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const iadd{insn};
if (sat) {
throw NotImplementedException("IADD SAT");
}
if (x && po) {
throw NotImplementedException("IADD X+PO");
}
// Operand A is always read from here, negated if needed
IR::U32 op_a{v.X(iadd.src_a)};
if (neg_a) {
op_a = v.ir.INeg(op_a);
}
// Add both operands
IR::U32 result{v.ir.IAdd(op_a, op_b)};
if (x) {
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
result = v.ir.IAdd(result, carry);
}
if (po) {
// .PO adds one to the result
result = v.ir.IAdd(result, v.ir.Imm32(1));
}
if (cc) {
// Store flags
// TODO: Does this grab the result pre-PO or after?
if (po) {
throw NotImplementedException("IADD CC+PO");
}
// TODO: How does CC behave when X is set?
if (x) {
throw NotImplementedException("IADD X+CC");
}
v.SetZFlag(v.ir.GetZeroFromOp(result));
v.SetSFlag(v.ir.GetSignFromOp(result));
v.SetCFlag(v.ir.GetCarryFromOp(result));
v.SetOFlag(v.ir.GetOverflowFromOp(result));
}
// Store result
v.X(iadd.dest_reg, result);
}
void IADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 insn;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> three_for_po;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<50, 1, u64> sat;
} const iadd{insn};
const bool po{iadd.three_for_po == 3};
const bool neg_a{!po && iadd.neg_a != 0};
if (!po && iadd.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
IADD(v, insn, op_b, iadd.neg_a != 0, po, iadd.sat != 0, iadd.x != 0, iadd.cc != 0);
}
} // Anonymous namespace
void TranslatorVisitor::IADD_reg(u64) {
throw NotImplementedException("IADD (reg)");
}
void TranslatorVisitor::IADD_cbuf(u64 insn) {
IADD(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::IADD_imm(u64) {
throw NotImplementedException("IADD (imm)");
}
void TranslatorVisitor::IADD32I(u64 insn) {
union {
u64 raw;
BitField<52, 1, u64> cc;
BitField<53, 1, u64> x;
BitField<54, 1, u64> sat;
BitField<55, 2, u64> three_for_po;
BitField<56, 1, u64> neg_a;
} const iadd32i{insn};
const bool po{iadd32i.three_for_po == 3};
const bool neg_a{!po && iadd32i.neg_a != 0};
IADD(*this, insn, GetImm32(insn), neg_a, po, iadd32i.sat != 0, iadd32i.x != 0, iadd32i.cc != 0);
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,73 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void ISCADD(TranslatorVisitor& v, u64 insn, IR::U32 op_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> op_a;
BitField<47, 1, u64> cc;
BitField<48, 2, u64> three_for_po;
BitField<48, 1, u64> neg_b;
BitField<49, 1, u64> neg_a;
BitField<39, 5, u64> scale;
} const iscadd{insn};
const bool po{iscadd.three_for_po == 3};
IR::U32 op_a{v.X(iscadd.op_a)};
if (!po) {
// When PO is not present, the bits are interpreted as negation
if (iscadd.neg_a != 0) {
op_a = v.ir.INeg(op_a);
}
if (iscadd.neg_b != 0) {
op_b = v.ir.INeg(op_b);
}
}
// With the operands already processed, scale A
const IR::U32 scale{v.ir.Imm32(static_cast<u32>(iscadd.scale))};
const IR::U32 scaled_a{v.ir.ShiftLeftLogical(op_a, scale)};
IR::U32 result{v.ir.IAdd(scaled_a, op_b)};
if (po) {
// .PO adds one to the final result
result = v.ir.IAdd(result, v.ir.Imm32(1));
}
v.X(iscadd.dest_reg, result);
if (iscadd.cc != 0) {
throw NotImplementedException("ISCADD CC");
}
}
} // Anonymous namespace
void TranslatorVisitor::ISCADD_reg(u64 insn) {
union {
u64 raw;
BitField<20, 8, IR::Reg> op_b;
} const iscadd{insn};
ISCADD(*this, insn, X(iscadd.op_b));
}
void TranslatorVisitor::ISCADD_cbuf(u64) {
throw NotImplementedException("ISCADD (cbuf)");
}
void TranslatorVisitor::ISCADD_imm(u64) {
throw NotImplementedException("ISCADD (imm)");
}
void TranslatorVisitor::ISCADD32I(u64) {
throw NotImplementedException("ISCADD32I");
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,99 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class CompareOp : u64 {
F, // Always false
LT, // Less than
EQ, // Equal
LE, // Less than or equal
GT, // Greater than
NE, // Not equal
GE, // Greater than or equal
T, // Always true
};
enum class Bop : u64 {
AND,
OR,
XOR,
};
IR::U1 Compare(IR::IREmitter& ir, CompareOp op, const IR::U32& lhs, const IR::U32& rhs,
bool is_signed) {
switch (op) {
case CompareOp::F:
return ir.Imm1(false);
case CompareOp::LT:
return ir.ILessThan(lhs, rhs, is_signed);
case CompareOp::EQ:
return ir.IEqual(lhs, rhs);
case CompareOp::LE:
return ir.ILessThanEqual(lhs, rhs, is_signed);
case CompareOp::GT:
return ir.IGreaterThan(lhs, rhs, is_signed);
case CompareOp::NE:
return ir.INotEqual(lhs, rhs);
case CompareOp::GE:
return ir.IGreaterThanEqual(lhs, rhs, is_signed);
case CompareOp::T:
return ir.Imm1(true);
}
throw NotImplementedException("Invalid ISETP compare op {}", op);
}
IR::U1 Combine(IR::IREmitter& ir, Bop bop, const IR::U1& comparison, const IR::U1& bop_pred) {
switch (bop) {
case Bop::AND:
return ir.LogicalAnd(comparison, bop_pred);
case Bop::OR:
return ir.LogicalOr(comparison, bop_pred);
case Bop::XOR:
return ir.LogicalXor(comparison, bop_pred);
}
throw NotImplementedException("Invalid ISETP bop {}", bop);
}
void ISETP(TranslatorVisitor& v, u64 insn, const IR::U32& op_b) {
union {
u64 raw;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<45, 2, Bop> bop;
BitField<48, 1, u64> is_signed;
BitField<49, 3, CompareOp> compare_op;
} const isetp{insn};
const Bop bop{isetp.bop};
const IR::U32 op_a{v.X(isetp.src_reg_a)};
const IR::U1 comparison{Compare(v.ir, isetp.compare_op, op_a, op_b, isetp.is_signed != 0)};
const IR::U1 bop_pred{v.ir.GetPred(isetp.bop_pred, isetp.neg_bop_pred != 0)};
const IR::U1 result_a{Combine(v.ir, bop, comparison, bop_pred)};
const IR::U1 result_b{Combine(v.ir, bop, v.ir.LogicalNot(comparison), bop_pred)};
v.ir.SetPred(isetp.dest_pred_a, result_a);
v.ir.SetPred(isetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::ISETP_reg(u64 insn) {
ISETP(*this, insn, GetReg20(insn));
}
void TranslatorVisitor::ISETP_cbuf(u64 insn) {
ISETP(*this, insn, GetCbuf(insn));
}
void TranslatorVisitor::ISETP_imm(u64) {
throw NotImplementedException("ISETP_imm");
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,71 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void SHL(TranslatorVisitor& v, u64 insn, const IR::U32& unsafe_shift) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<39, 1, u64> w;
BitField<43, 1, u64> x;
BitField<47, 1, u64> cc;
} const shl{insn};
if (shl.x != 0) {
throw NotImplementedException("SHL.X");
}
if (shl.cc != 0) {
throw NotImplementedException("SHL.CC");
}
const IR::U32 base{v.X(shl.src_reg_a)};
IR::U32 result;
if (shl.w != 0) {
// When .W is set, the shift value is wrapped
// To emulate this we just have to clamp it ourselves.
const IR::U32 shift{v.ir.BitwiseAnd(unsafe_shift, v.ir.Imm32(31))};
result = v.ir.ShiftLeftLogical(base, shift);
} else {
// When .W is not set, the shift value is clamped between 0 and 32.
// To emulate this we have to have in mind the special shift of 32, that evaluates as 0.
// We can safely evaluate an out of bounds shift according to the SPIR-V specification:
//
// https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#OpShiftLeftLogical
// "Shift is treated as unsigned. The resulting value is undefined if Shift is greater than
// or equal to the bit width of the components of Base."
//
// And on the GLASM specification it is also safe to evaluate out of bounds:
//
// https://www.khronos.org/registry/OpenGL/extensions/NV/NV_gpu_program4.txt
// "The results of a shift operation ("<<") are undefined if the value of the second operand
// is negative, or greater than or equal to the number of bits in the first operand."
//
// Emphasis on undefined results in contrast to undefined behavior.
//
const IR::U1 is_safe{v.ir.ILessThan(unsafe_shift, v.ir.Imm32(32), false)};
const IR::U32 unsafe_result{v.ir.ShiftLeftLogical(base, unsafe_shift)};
result = v.ir.Select(is_safe, unsafe_result, v.ir.Imm32(0));
}
v.X(shl.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::SHL_reg(u64) {
throw NotImplementedException("SHL_reg");
}
void TranslatorVisitor::SHL_cbuf(u64) {
throw NotImplementedException("SHL_cbuf");
}
void TranslatorVisitor::SHL_imm(u64 insn) {
SHL(*this, insn, GetImm20(insn));
}
} // namespace Shader::Maxwell

View File

@ -0,0 +1,110 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class SelectMode : u64 {
Default,
CLO,
CHI,
CSFU,
CBCC,
};
enum class Half : u64 {
H0, // Least-significant bits (15:0)
H1, // Most-significant bits (31:16)
};
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
}
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_a_signed;
BitField<49, 1, u64> is_b_signed;
BitField<53, 1, Half> half_a;
} const xmad{insn};
if (x) {
throw NotImplementedException("XMAD X");
}
const IR::U32 op_a{ExtractHalf(v, v.X(xmad.src_reg_a), xmad.half_a, xmad.is_a_signed != 0)};
const IR::U32 op_b{ExtractHalf(v, src_b, half_b, xmad.is_b_signed != 0)};
IR::U32 product{v.ir.IMul(op_a, op_b)};
if (psl) {
// .PSL shifts the product 16 bits
product = v.ir.ShiftLeftLogical(product, v.ir.Imm32(16));
}
const IR::U32 op_c{[&]() -> IR::U32 {
switch (select_mode) {
case SelectMode::Default:
return src_c;
case SelectMode::CLO:
return ExtractHalf(v, src_c, Half::H0, false);
case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false);
case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_b);
case SelectMode::CSFU:
throw NotImplementedException("XMAD CSFU");
}
throw NotImplementedException("Invalid XMAD select mode {}", select_mode);
}()};
IR::U32 result{v.ir.IAdd(product, op_c)};
if (mrg) {
// .MRG inserts src_b [15:0] into result's [31:16].
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
}
if (xmad.cc) {
throw NotImplementedException("XMAD CC");
}
// Store result
v.X(xmad.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::XMAD_reg(u64) {
throw NotImplementedException("XMAD (reg)");
}
void TranslatorVisitor::XMAD_rc(u64) {
throw NotImplementedException("XMAD (rc)");
}
void TranslatorVisitor::XMAD_cr(u64) {
throw NotImplementedException("XMAD (cr)");
}
void TranslatorVisitor::XMAD_imm(u64 insn) {
union {
u64 raw;
BitField<20, 16, u64> src_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
BitField<39, 8, IR::Reg> src_c;
BitField<50, 3, SelectMode> select_mode;
} const xmad{insn};
const IR::U32 src_b{ir.Imm32(static_cast<u32>(xmad.src_b))};
const IR::U32 src_c{X(xmad.src_c)};
XMAD(*this, insn, src_b, src_c, xmad.select_mode, Half::H0, xmad.psl != 0, xmad.mrg != 0,
xmad.x != 0);
}
} // namespace Shader::Maxwell

View File

@ -10,14 +10,33 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class StoreSize : u64 { enum class LoadSize : u64 {
U8, U8, // Zero-extend
S8, S8, // Sign-extend
U16, U16, // Zero-extend
S16, S16, // Sign-extend
B32, B32,
B64, B64,
B128, B128,
U128, // ???
};
enum class StoreSize : u64 {
U8, // Zero-extend
S8, // Sign-extend
U16, // Zero-extend
S16, // Sign-extend
B32,
B64,
B128,
};
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class LoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (cache in L2 and below, not L1)
CI, // ???
CV, // Don't cache and fetch again (consider cached system memory lines stale, fetch again)
}; };
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html // See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
@ -27,61 +46,137 @@ enum class StoreCache : u64 {
CS, // Cache streaming, likely to be accessed once CS, // Cache streaming, likely to be accessed once
WT, // Cache write-through (to system memory) WT, // Cache write-through (to system memory)
}; };
IR::U64 Address(TranslatorVisitor& v, u64 insn) {
union {
u64 raw;
BitField<8, 8, IR::Reg> addr_reg;
BitField<20, 24, s64> addr_offset;
BitField<20, 24, u64> rz_addr_offset;
BitField<45, 1, u64> e;
} const mem{insn};
const IR::U64 address{[&]() -> IR::U64 {
if (mem.e == 0) {
// LDG/STG without .E uses a 32-bit pointer, zero-extend it
return v.ir.ConvertU(64, v.X(mem.addr_reg));
}
if (!IR::IsAligned(mem.addr_reg, 2)) {
throw NotImplementedException("Unaligned address register");
}
// Pack two registers to build the 64-bit address
return v.ir.PackUint2x32(v.ir.CompositeConstruct(v.X(mem.addr_reg), v.X(mem.addr_reg + 1)));
}()};
const u64 addr_offset{[&]() -> u64 {
if (mem.addr_reg == IR::Reg::RZ) {
// When RZ is used, the address is an absolute address
return static_cast<u64>(mem.rz_addr_offset.Value());
} else {
return static_cast<u64>(mem.addr_offset.Value());
}
}()};
// Apply the offset
return v.ir.IAdd(address, v.ir.Imm64(addr_offset));
}
} // Anonymous namespace } // Anonymous namespace
void TranslatorVisitor::LDG(u64 insn) {
// LDG loads global memory into registers
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<46, 2, LoadCache> cache;
BitField<48, 3, LoadSize> size;
} const ldg{insn};
// Pointer to load data from
const IR::U64 address{Address(*this, insn)};
const IR::Reg dest_reg{ldg.dest_reg};
switch (ldg.size) {
case LoadSize::U8:
X(dest_reg, ir.LoadGlobalU8(address));
break;
case LoadSize::S8:
X(dest_reg, ir.LoadGlobalS8(address));
break;
case LoadSize::U16:
X(dest_reg, ir.LoadGlobalU16(address));
break;
case LoadSize::S16:
X(dest_reg, ir.LoadGlobalS16(address));
break;
case LoadSize::B32:
X(dest_reg, ir.LoadGlobal32(address));
break;
case LoadSize::B64: {
if (!IR::IsAligned(dest_reg, 2)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{ir.LoadGlobal64(address)};
for (int i = 0; i < 2; ++i) {
X(dest_reg + i, ir.CompositeExtract(vector, i));
}
break;
}
case LoadSize::B128: {
if (!IR::IsAligned(dest_reg, 4)) {
throw NotImplementedException("Unaligned data registers");
}
const IR::Value vector{ir.LoadGlobal128(address)};
for (int i = 0; i < 4; ++i) {
X(dest_reg + i, ir.CompositeExtract(vector, i));
}
break;
}
case LoadSize::U128:
throw NotImplementedException("LDG U.128");
default:
throw NotImplementedException("Invalid LDG size {}", ldg.size.Value());
}
}
void TranslatorVisitor::STG(u64 insn) { void TranslatorVisitor::STG(u64 insn) {
// STG stores registers into global memory. // STG stores registers into global memory.
union { union {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> data_reg; BitField<0, 8, IR::Reg> data_reg;
BitField<8, 8, IR::Reg> addr_reg;
BitField<45, 1, u64> e;
BitField<46, 2, StoreCache> cache; BitField<46, 2, StoreCache> cache;
BitField<48, 3, StoreSize> size; BitField<48, 3, StoreSize> size;
} const stg{insn}; } const stg{insn};
const IR::U64 address{[&]() -> IR::U64 { // Pointer to store data into
if (stg.e == 0) { const IR::U64 address{Address(*this, insn)};
// STG without .E uses a 32-bit pointer, zero-extend it const IR::Reg data_reg{stg.data_reg};
return ir.ConvertU(64, X(stg.addr_reg));
}
if (!IR::IsAligned(stg.addr_reg, 2)) {
throw NotImplementedException("Unaligned address register");
}
// Pack two registers to build the 32-bit address
return ir.PackUint2x32(ir.CompositeConstruct(X(stg.addr_reg), X(stg.addr_reg + 1)));
}()};
switch (stg.size) { switch (stg.size) {
case StoreSize::U8: case StoreSize::U8:
ir.WriteGlobalU8(address, X(stg.data_reg)); ir.WriteGlobalU8(address, X(data_reg));
break; break;
case StoreSize::S8: case StoreSize::S8:
ir.WriteGlobalS8(address, X(stg.data_reg)); ir.WriteGlobalS8(address, X(data_reg));
break; break;
case StoreSize::U16: case StoreSize::U16:
ir.WriteGlobalU16(address, X(stg.data_reg)); ir.WriteGlobalU16(address, X(data_reg));
break; break;
case StoreSize::S16: case StoreSize::S16:
ir.WriteGlobalS16(address, X(stg.data_reg)); ir.WriteGlobalS16(address, X(data_reg));
break; break;
case StoreSize::B32: case StoreSize::B32:
ir.WriteGlobal32(address, X(stg.data_reg)); ir.WriteGlobal32(address, X(data_reg));
break; break;
case StoreSize::B64: { case StoreSize::B64: {
if (!IR::IsAligned(stg.data_reg, 2)) { if (!IR::IsAligned(data_reg, 2)) {
throw NotImplementedException("Unaligned data registers"); throw NotImplementedException("Unaligned data registers");
} }
const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1))}; const IR::Value vector{ir.CompositeConstruct(X(data_reg), X(data_reg + 1))};
ir.WriteGlobal64(address, vector); ir.WriteGlobal64(address, vector);
break; break;
} }
case StoreSize::B128: case StoreSize::B128:
if (!IR::IsAligned(stg.data_reg, 4)) { if (!IR::IsAligned(data_reg, 4)) {
throw NotImplementedException("Unaligned data registers"); throw NotImplementedException("Unaligned data registers");
} }
const IR::Value vector{ir.CompositeConstruct(X(stg.data_reg), X(stg.data_reg + 1), const IR::Value vector{
X(stg.data_reg + 2), X(stg.data_reg + 3))}; ir.CompositeConstruct(X(data_reg), X(data_reg + 1), X(data_reg + 2), X(data_reg + 3))};
ir.WriteGlobal128(address, vector); ir.WriteGlobal128(address, vector);
break; break;
} }

View File

@ -39,7 +39,7 @@ void TranslatorVisitor::MOV_cbuf(u64 insn) {
void TranslatorVisitor::MOV_imm(u64 insn) { void TranslatorVisitor::MOV_imm(u64 insn) {
const MOV mov{insn}; const MOV mov{insn};
CheckMask(mov); CheckMask(mov);
X(mov.dest_reg, GetImm(insn)); X(mov.dest_reg, GetImm20(insn));
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View File

@ -0,0 +1,114 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class SpecialRegister : u64 {
SR_LANEID = 0,
SR_VIRTCFG = 2,
SR_VIRTID = 3,
SR_PM0 = 4,
SR_PM1 = 5,
SR_PM2 = 6,
SR_PM3 = 7,
SR_PM4 = 8,
SR_PM5 = 9,
SR_PM6 = 10,
SR_PM7 = 11,
SR_ORDERING_TICKET = 15,
SR_PRIM_TYPE = 16,
SR_INVOCATION_ID = 17,
SR_Y_DIRECTION = 18,
SR_THREAD_KILL = 19,
SM_SHADER_TYPE = 20,
SR_DIRECTCBEWRITEADDRESSLOW = 21,
SR_DIRECTCBEWRITEADDRESSHIGH = 22,
SR_DIRECTCBEWRITEENABLE = 23,
SR_MACHINE_ID_0 = 24,
SR_MACHINE_ID_1 = 25,
SR_MACHINE_ID_2 = 26,
SR_MACHINE_ID_3 = 27,
SR_AFFINITY = 28,
SR_INVOCATION_INFO = 29,
SR_WSCALEFACTOR_XY = 30,
SR_WSCALEFACTOR_Z = 31,
SR_TID = 32,
SR_TID_X = 33,
SR_TID_Y = 34,
SR_TID_Z = 35,
SR_CTAID_X = 37,
SR_CTAID_Y = 38,
SR_CTAID_Z = 39,
SR_NTID = 49,
SR_CirQueueIncrMinusOne = 50,
SR_NLATC = 51,
SR_SWINLO = 57,
SR_SWINSZ = 58,
SR_SMEMSZ = 59,
SR_SMEMBANKS = 60,
SR_LWINLO = 61,
SR_LWINSZ = 62,
SR_LMEMLOSZ = 63,
SR_LMEMHIOFF = 64,
SR_EQMASK = 65,
SR_LTMASK = 66,
SR_LEMASK = 67,
SR_GTMASK = 68,
SR_GEMASK = 69,
SR_REGALLOC = 70,
SR_GLOBALERRORSTATUS = 73,
SR_WARPERRORSTATUS = 75,
SR_PM_HI0 = 81,
SR_PM_HI1 = 82,
SR_PM_HI2 = 83,
SR_PM_HI3 = 84,
SR_PM_HI4 = 85,
SR_PM_HI5 = 86,
SR_PM_HI6 = 87,
SR_PM_HI7 = 88,
SR_CLOCKLO = 89,
SR_CLOCKHI = 90,
SR_GLOBALTIMERLO = 91,
SR_GLOBALTIMERHI = 92,
SR_HWTASKID = 105,
SR_CIRCULARQUEUEENTRYINDEX = 106,
SR_CIRCULARQUEUEENTRYADDRESSLOW = 107,
SR_CIRCULARQUEUEENTRYADDRESSHIGH = 108,
};
[[nodiscard]] IR::U32 Read(IR::IREmitter& ir, SpecialRegister special_register) {
switch (special_register) {
case SpecialRegister::SR_TID_X:
return ir.LocalInvocationIdX();
case SpecialRegister::SR_TID_Y:
return ir.LocalInvocationIdY();
case SpecialRegister::SR_TID_Z:
return ir.LocalInvocationIdZ();
case SpecialRegister::SR_CTAID_X:
return ir.WorkgroupIdX();
case SpecialRegister::SR_CTAID_Y:
return ir.WorkgroupIdY();
case SpecialRegister::SR_CTAID_Z:
return ir.WorkgroupIdZ();
default:
throw NotImplementedException("S2R special register {}", special_register);
}
}
} // Anonymous namespace
void TranslatorVisitor::S2R(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<20, 8, SpecialRegister> src_reg;
} const s2r{insn};
X(s2r.dest_reg, Read(ir, s2r.src_reg));
}
} // namespace Shader::Maxwell

View File

@ -7,21 +7,8 @@
#include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/opcode.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Maxwell { namespace Shader::Maxwell {
[[maybe_unused]] static inline void DumpOptimized(IR::Block& block) {
auto raw{IR::DumpBlock(block)};
Optimization::GetSetElimination(block);
Optimization::DeadCodeEliminationPass(block);
Optimization::IdentityRemovalPass(block);
auto dumped{IR::DumpBlock(block)};
fmt::print(stderr, "{}", dumped);
}
[[noreturn]] static void ThrowNotImplemented(Opcode opcode) { [[noreturn]] static void ThrowNotImplemented(Opcode opcode) {
throw NotImplementedException("Instruction {} is not implemented", opcode); throw NotImplementedException("Instruction {} is not implemented", opcode);
} }
@ -146,8 +133,8 @@ void TranslatorVisitor::DADD_imm(u64) {
ThrowNotImplemented(Opcode::DADD_imm); ThrowNotImplemented(Opcode::DADD_imm);
} }
void TranslatorVisitor::DEPBAR(u64) { void TranslatorVisitor::DEPBAR() {
ThrowNotImplemented(Opcode::DEPBAR); // DEPBAR is a no-op
} }
void TranslatorVisitor::DFMA_reg(u64) { void TranslatorVisitor::DFMA_reg(u64) {
@ -230,22 +217,6 @@ void TranslatorVisitor::F2F_imm(u64) {
ThrowNotImplemented(Opcode::F2F_imm); ThrowNotImplemented(Opcode::F2F_imm);
} }
void TranslatorVisitor::FADD_reg(u64) {
ThrowNotImplemented(Opcode::FADD_reg);
}
void TranslatorVisitor::FADD_cbuf(u64) {
ThrowNotImplemented(Opcode::FADD_cbuf);
}
void TranslatorVisitor::FADD_imm(u64) {
ThrowNotImplemented(Opcode::FADD_imm);
}
void TranslatorVisitor::FADD32I(u64) {
ThrowNotImplemented(Opcode::FADD32I);
}
void TranslatorVisitor::FCHK_reg(u64) { void TranslatorVisitor::FCHK_reg(u64) {
ThrowNotImplemented(Opcode::FCHK_reg); ThrowNotImplemented(Opcode::FCHK_reg);
} }
@ -274,26 +245,6 @@ void TranslatorVisitor::FCMP_imm(u64) {
ThrowNotImplemented(Opcode::FCMP_imm); ThrowNotImplemented(Opcode::FCMP_imm);
} }
void TranslatorVisitor::FFMA_reg(u64) {
ThrowNotImplemented(Opcode::FFMA_reg);
}
void TranslatorVisitor::FFMA_rc(u64) {
ThrowNotImplemented(Opcode::FFMA_rc);
}
void TranslatorVisitor::FFMA_cr(u64) {
ThrowNotImplemented(Opcode::FFMA_cr);
}
void TranslatorVisitor::FFMA_imm(u64) {
ThrowNotImplemented(Opcode::FFMA_imm);
}
void TranslatorVisitor::FFMA32I(u64) {
ThrowNotImplemented(Opcode::FFMA32I);
}
void TranslatorVisitor::FLO_reg(u64) { void TranslatorVisitor::FLO_reg(u64) {
ThrowNotImplemented(Opcode::FLO_reg); ThrowNotImplemented(Opcode::FLO_reg);
} }
@ -318,22 +269,6 @@ void TranslatorVisitor::FMNMX_imm(u64) {
ThrowNotImplemented(Opcode::FMNMX_imm); ThrowNotImplemented(Opcode::FMNMX_imm);
} }
void TranslatorVisitor::FMUL_reg(u64) {
ThrowNotImplemented(Opcode::FMUL_reg);
}
void TranslatorVisitor::FMUL_cbuf(u64) {
ThrowNotImplemented(Opcode::FMUL_cbuf);
}
void TranslatorVisitor::FMUL_imm(u64) {
ThrowNotImplemented(Opcode::FMUL_imm);
}
void TranslatorVisitor::FMUL32I(u64) {
ThrowNotImplemented(Opcode::FMUL32I);
}
void TranslatorVisitor::FSET_reg(u64) { void TranslatorVisitor::FSET_reg(u64) {
ThrowNotImplemented(Opcode::FSET_reg); ThrowNotImplemented(Opcode::FSET_reg);
} }
@ -470,18 +405,6 @@ void TranslatorVisitor::I2I_imm(u64) {
ThrowNotImplemented(Opcode::I2I_imm); ThrowNotImplemented(Opcode::I2I_imm);
} }
void TranslatorVisitor::IADD_reg(u64) {
ThrowNotImplemented(Opcode::IADD_reg);
}
void TranslatorVisitor::IADD_cbuf(u64) {
ThrowNotImplemented(Opcode::IADD_cbuf);
}
void TranslatorVisitor::IADD_imm(u64) {
ThrowNotImplemented(Opcode::IADD_imm);
}
void TranslatorVisitor::IADD3_reg(u64) { void TranslatorVisitor::IADD3_reg(u64) {
ThrowNotImplemented(Opcode::IADD3_reg); ThrowNotImplemented(Opcode::IADD3_reg);
} }
@ -494,10 +417,6 @@ void TranslatorVisitor::IADD3_imm(u64) {
ThrowNotImplemented(Opcode::IADD3_imm); ThrowNotImplemented(Opcode::IADD3_imm);
} }
void TranslatorVisitor::IADD32I(u64) {
ThrowNotImplemented(Opcode::IADD32I);
}
void TranslatorVisitor::ICMP_reg(u64) { void TranslatorVisitor::ICMP_reg(u64) {
ThrowNotImplemented(Opcode::ICMP_reg); ThrowNotImplemented(Opcode::ICMP_reg);
} }
@ -594,22 +513,6 @@ void TranslatorVisitor::ISBERD(u64) {
ThrowNotImplemented(Opcode::ISBERD); ThrowNotImplemented(Opcode::ISBERD);
} }
void TranslatorVisitor::ISCADD_reg(u64) {
ThrowNotImplemented(Opcode::ISCADD_reg);
}
void TranslatorVisitor::ISCADD_cbuf(u64) {
ThrowNotImplemented(Opcode::ISCADD_cbuf);
}
void TranslatorVisitor::ISCADD_imm(u64) {
ThrowNotImplemented(Opcode::ISCADD_imm);
}
void TranslatorVisitor::ISCADD32I(u64) {
ThrowNotImplemented(Opcode::ISCADD32I);
}
void TranslatorVisitor::ISET_reg(u64) { void TranslatorVisitor::ISET_reg(u64) {
ThrowNotImplemented(Opcode::ISET_reg); ThrowNotImplemented(Opcode::ISET_reg);
} }
@ -622,18 +525,6 @@ void TranslatorVisitor::ISET_imm(u64) {
ThrowNotImplemented(Opcode::ISET_imm); ThrowNotImplemented(Opcode::ISET_imm);
} }
void TranslatorVisitor::ISETP_reg(u64) {
ThrowNotImplemented(Opcode::ISETP_reg);
}
void TranslatorVisitor::ISETP_cbuf(u64) {
ThrowNotImplemented(Opcode::ISETP_cbuf);
}
void TranslatorVisitor::ISETP_imm(u64) {
ThrowNotImplemented(Opcode::ISETP_imm);
}
void TranslatorVisitor::JCAL(u64) { void TranslatorVisitor::JCAL(u64) {
ThrowNotImplemented(Opcode::JCAL); ThrowNotImplemented(Opcode::JCAL);
} }
@ -658,10 +549,6 @@ void TranslatorVisitor::LDC(u64) {
ThrowNotImplemented(Opcode::LDC); ThrowNotImplemented(Opcode::LDC);
} }
void TranslatorVisitor::LDG(u64) {
ThrowNotImplemented(Opcode::LDG);
}
void TranslatorVisitor::LDL(u64) { void TranslatorVisitor::LDL(u64) {
ThrowNotImplemented(Opcode::LDL); ThrowNotImplemented(Opcode::LDL);
} }
@ -866,10 +753,6 @@ void TranslatorVisitor::RTT(u64) {
ThrowNotImplemented(Opcode::RTT); ThrowNotImplemented(Opcode::RTT);
} }
void TranslatorVisitor::S2R(u64) {
ThrowNotImplemented(Opcode::S2R);
}
void TranslatorVisitor::SAM(u64) { void TranslatorVisitor::SAM(u64) {
ThrowNotImplemented(Opcode::SAM); ThrowNotImplemented(Opcode::SAM);
} }
@ -914,18 +797,6 @@ void TranslatorVisitor::SHFL(u64) {
ThrowNotImplemented(Opcode::SHFL); ThrowNotImplemented(Opcode::SHFL);
} }
void TranslatorVisitor::SHL_reg(u64) {
ThrowNotImplemented(Opcode::SHL_reg);
}
void TranslatorVisitor::SHL_cbuf(u64) {
ThrowNotImplemented(Opcode::SHL_cbuf);
}
void TranslatorVisitor::SHL_imm(u64) {
ThrowNotImplemented(Opcode::SHL_imm);
}
void TranslatorVisitor::SHR_reg(u64) { void TranslatorVisitor::SHR_reg(u64) {
ThrowNotImplemented(Opcode::SHR_reg); ThrowNotImplemented(Opcode::SHR_reg);
} }
@ -1086,20 +957,4 @@ void TranslatorVisitor::VSHR(u64) {
ThrowNotImplemented(Opcode::VSHR); ThrowNotImplemented(Opcode::VSHR);
} }
void TranslatorVisitor::XMAD_reg(u64) {
ThrowNotImplemented(Opcode::XMAD_reg);
}
void TranslatorVisitor::XMAD_rc(u64) {
ThrowNotImplemented(Opcode::XMAD_rc);
}
void TranslatorVisitor::XMAD_cr(u64) {
ThrowNotImplemented(Opcode::XMAD_cr);
}
void TranslatorVisitor::XMAD_imm(u64) {
ThrowNotImplemented(Opcode::XMAD_imm);
}
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View File

@ -1,87 +0,0 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <array>
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/microinstruction.h"
#include "shader_recompiler/ir_opt/passes.h"
namespace Shader::Optimization {
namespace {
using Iterator = IR::Block::iterator;
enum class TrackingType {
Reg,
};
struct RegisterInfo {
IR::Value register_value;
TrackingType tracking_type;
Iterator last_set_instruction;
bool set_instruction_present = false;
};
void DoSet(IR::Block& block, RegisterInfo& info, IR::Value value, Iterator set_inst,
TrackingType tracking_type) {
if (info.set_instruction_present) {
info.last_set_instruction->Invalidate();
block.Instructions().erase(info.last_set_instruction);
}
info.register_value = value;
info.tracking_type = tracking_type;
info.set_instruction_present = true;
info.last_set_instruction = set_inst;
}
RegisterInfo Nothing(Iterator get_inst, TrackingType tracking_type) {
RegisterInfo info{};
info.register_value = IR::Value{&*get_inst};
info.tracking_type = tracking_type;
return info;
}
void DoGet(RegisterInfo& info, Iterator get_inst, TrackingType tracking_type) {
if (info.register_value.IsEmpty()) {
info = Nothing(get_inst, tracking_type);
return;
}
if (info.tracking_type == tracking_type) {
get_inst->ReplaceUsesWith(info.register_value);
return;
}
info = Nothing(get_inst, tracking_type);
}
} // Anonymous namespace
void GetSetElimination(IR::Block& block) {
std::array<RegisterInfo, 255> reg_info;
for (Iterator inst = block.begin(); inst != block.end(); ++inst) {
switch (inst->Opcode()) {
case IR::Opcode::GetRegister: {
const IR::Reg reg{inst->Arg(0).Reg()};
if (reg == IR::Reg::RZ) {
break;
}
const size_t index{static_cast<size_t>(reg)};
DoGet(reg_info.at(index), inst, TrackingType::Reg);
break;
}
case IR::Opcode::SetRegister: {
const IR::Reg reg{inst->Arg(0).Reg()};
if (reg == IR::Reg::RZ) {
break;
}
const size_t index{static_cast<size_t>(reg)};
DoSet(block, reg_info.at(index), inst->Arg(1), inst, TrackingType::Reg);
break;
}
default:
break;
}
}
}
} // namespace Shader::Optimization

View File

@ -17,7 +17,6 @@ void Invoke(Func&& func, IR::Function& function) {
} }
void DeadCodeEliminationPass(IR::Block& block); void DeadCodeEliminationPass(IR::Block& block);
void GetSetElimination(IR::Block& block);
void IdentityRemovalPass(IR::Block& block); void IdentityRemovalPass(IR::Block& block);
void SsaRewritePass(IR::Function& function); void SsaRewritePass(IR::Function& function);
void VerificationPass(const IR::Block& block); void VerificationPass(const IR::Block& block);

View File

@ -51,7 +51,8 @@ void RunDatabase() {
int main() { int main() {
// RunDatabase(); // RunDatabase();
FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"};
auto cfg{std::make_unique<Flow::CFG>(env, 0)}; auto cfg{std::make_unique<Flow::CFG>(env, 0)};
// fmt::print(stdout, "{}\n", cfg->Dot()); // fmt::print(stdout, "{}\n", cfg->Dot());