yuzu-emu
/
yuzu-android
Archived
1
0
Fork 0

shader: Refactor PTP and other minor changes

This commit is contained in:
ReinUsesLisp 2021-03-26 16:46:07 -03:00 committed by ameerj
parent b5db38f50e
commit d9c5bd9509
14 changed files with 67 additions and 123 deletions

View File

@ -169,7 +169,6 @@ void EmitContext::DefineCommonTypes(const Info& info) {
AddCapability(spv::Capability::Float64);
F64.Define(*this, TypeFloat(64), "f64");
}
array_U32x2 = Name(TypeArray(U32[2], Constant(U32[1], 4U)), "array-u32x2");
}
void EmitContext::DefineCommonConstants() {
@ -352,21 +351,20 @@ void EmitContext::DefineOutputs(const Info& info) {
}
}
if (stage == Stage::Fragment) {
for (size_t i = 0; i < 8; ++i) {
if (!info.stores_frag_color[i]) {
for (u32 index = 0; index < 8; ++index) {
if (!info.stores_frag_color[index]) {
continue;
}
frag_color[i] = DefineOutput(*this, F32[4]);
Decorate(frag_color[i], spv::Decoration::Location, static_cast<u32>(i));
Name(frag_color[i], fmt::format("frag_color{}", i));
}
if (!info.stores_frag_depth) {
return;
frag_color[index] = DefineOutput(*this, F32[4]);
Decorate(frag_color[index], spv::Decoration::Location, index);
Name(frag_color[index], fmt::format("frag_color{}", index));
}
if (info.stores_frag_depth) {
frag_depth = DefineOutput(*this, F32[1]);
Decorate(frag_depth, spv::Decoration::BuiltIn, static_cast<u32>(spv::BuiltIn::FragDepth));
Decorate(frag_depth, spv::Decoration::BuiltIn, spv::BuiltIn::FragDepth);
Name(frag_depth, "frag_depth");
}
}
}
} // namespace Shader::Backend::SPIRV

View File

@ -65,7 +65,6 @@ public:
VectorTypes U32;
VectorTypes F16;
VectorTypes F64;
Id array_U32x2;
Id true_value{};
Id false_value{};

View File

@ -95,7 +95,7 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
Id value);
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
Id value);
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
@ -104,7 +104,7 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
@ -113,7 +113,7 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2);
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
@ -122,7 +122,6 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4);
void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx);
@ -359,10 +358,10 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id coords, Id dref, Id bias_lc, Id offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod_lc, Id offset);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id offset2);
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2);
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id offset, Id offset2, Id dref);
const IR::Value& offset, const IR::Value& offset2, Id dref);
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms);
Id EmitVoteAll(EmitContext& ctx, Id pred);

View File

@ -7,11 +7,7 @@
namespace Shader::Backend::SPIRV {
Id EmitCompositeConstructU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
const auto info{inst->Flags<IR::CompositeDecoration>()};
if (info.is_constant) {
return ctx.ConstantComposite(ctx.U32[2], e1, e2);
}
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.U32[2], e1, e2);
}
@ -47,12 +43,7 @@ Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
}
Id EmitCompositeConstructF16x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
const auto info{inst->Flags<IR::CompositeDecoration>()};
if (info.is_constant) {
return ctx.ConstantComposite(ctx.F16[2], e1, e2);
}
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
}
@ -88,11 +79,7 @@ Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
}
Id EmitCompositeConstructF32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2) {
const auto info{inst->Flags<IR::CompositeDecoration>()};
if (info.is_constant) {
return ctx.ConstantComposite(ctx.F32[2], e1, e2);
}
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
}
@ -164,15 +151,4 @@ Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
}
Id EmitCompositeConstructArrayU32x2(EmitContext& ctx, IR::Inst* inst, Id e1, Id e2, Id e3, Id e4) {
const auto info{inst->Flags<IR::CompositeDecoration>()};
if (info.is_constant) {
return ctx.ConstantComposite(ctx.array_U32x2, e1, e2, e3, e4);
}
if (ctx.profile.support_variadic_ptp) {
return ctx.OpCompositeConstruct(ctx.array_U32x2, e1, e2, e3, e4);
}
return {};
}
} // namespace Shader::Backend::SPIRV

View File

@ -30,16 +30,34 @@ public:
}
}
explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id offset2) {
if (Sirit::ValidId(offset)) {
Add(spv::ImageOperandsMask::Offset, offset);
explicit ImageOperands(EmitContext& ctx, const IR::Value& offset, const IR::Value& offset2) {
if (offset2.IsEmpty()) {
if (offset.IsEmpty()) {
return;
}
if (Sirit::ValidId(offset2)) {
Add(spv::ImageOperandsMask::ConstOffsets, offset2);
Add(spv::ImageOperandsMask::Offset, ctx.Def(offset));
return;
}
const std::array values{offset.InstRecursive(), offset2.InstRecursive()};
if (!values[0]->AreAllArgsImmediates() || !values[1]->AreAllArgsImmediates()) {
throw NotImplementedException("Not all arguments in PTP are immediate");
}
const IR::Opcode opcode{values[0]->Opcode()};
if (opcode != values[1]->Opcode() || opcode != IR::Opcode::CompositeConstructU32x4) {
throw LogicError("Invalid PTP arguments");
}
auto read{[&](int a, int b) { return ctx.Constant(ctx.U32[1], values[a]->Arg(b).U32()); }};
const Id offsets{
ctx.ConstantComposite(ctx.TypeArray(ctx.U32[2], ctx.Constant(ctx.U32[1], 4)),
ctx.ConstantComposite(ctx.U32[2], read(0, 0), read(0, 1)),
ctx.ConstantComposite(ctx.U32[2], read(0, 2), read(0, 3)),
ctx.ConstantComposite(ctx.U32[2], read(1, 0), read(1, 1)),
ctx.ConstantComposite(ctx.U32[2], read(1, 2), read(1, 3)))};
Add(spv::ImageOperandsMask::ConstOffsets, offsets);
}
explicit ImageOperands([[maybe_unused]] EmitContext& ctx, Id offset, Id lod, Id ms) {
explicit ImageOperands(Id offset, Id lod, Id ms) {
if (Sirit::ValidId(lod)) {
Add(spv::ImageOperandsMask::Lod, lod);
}
@ -197,8 +215,8 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
}
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id offset2) {
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
const IR::Value& offset, const IR::Value& offset2) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const ImageOperands operands(ctx, offset, offset2);
return Emit(&EmitContext::OpImageSparseGather, &EmitContext::OpImageGather, ctx, inst,
@ -208,7 +226,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id
}
Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
Id offset, Id offset2, Id dref) {
const IR::Value& offset, const IR::Value& offset2, Id dref) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const ImageOperands operands(ctx, offset, offset2);
return Emit(&EmitContext::OpImageSparseDrefGather, &EmitContext::OpImageDrefGather, ctx, inst,
@ -218,7 +236,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id offset,
Id lod, Id ms) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
const ImageOperands operands(ctx, offset, lod, ms);
const ImageOperands operands(offset, lod, ms);
return Emit(&EmitContext::OpImageSparseFetch, &EmitContext::OpImageFetch, ctx, inst, ctx.F32[4],
Texture(ctx, index), coords, operands.Mask(), operands.Span());
}

View File

@ -398,16 +398,15 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
if (e1.Type() != e2.Type()) {
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
}
CompositeDecoration decor{};
switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x2, Flags{decor}, e1, e2);
return Inst(Opcode::CompositeConstructU32x2, e1, e2);
case Type::F16:
return Inst(Opcode::CompositeConstructF16x2, Flags{decor}, e1, e2);
return Inst(Opcode::CompositeConstructF16x2, e1, e2);
case Type::F32:
return Inst(Opcode::CompositeConstructF32x2, Flags{decor}, e1, e2);
return Inst(Opcode::CompositeConstructF32x2, e1, e2);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x2, Flags{decor}, e1, e2);
return Inst(Opcode::CompositeConstructF64x2, e1, e2);
default:
ThrowInvalidType(e1.Type());
}
@ -437,7 +436,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
e3.Type(), e4.Type());
}
CompositeDecoration decor{};
switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
@ -447,8 +445,6 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
case Type::U32x2:
return Inst(Opcode::CompositeConstructArrayU32x2, Flags{decor}, e1, e2, e3, e4);
default:
ThrowInvalidType(e1.Type());
}

View File

@ -101,8 +101,8 @@ public:
template <typename FlagsType>
requires(sizeof(FlagsType) <= sizeof(u32) && std::is_trivially_copyable_v<FlagsType>)
[[nodiscard]] void SetFlags(FlagsType& new_val) noexcept {
std::memcpy(&flags, &new_val, sizeof(new_val));
[[nodiscard]] void SetFlags(FlagsType value) noexcept {
std::memcpy(&flags, &value, sizeof(value));
}
/// Intrusively store the host definition of this instruction.

View File

@ -32,11 +32,6 @@ struct FpControl {
};
static_assert(sizeof(FpControl) <= sizeof(u32));
struct CompositeDecoration {
bool is_constant{false};
};
static_assert(sizeof(CompositeDecoration) <= sizeof(u32));
union TextureInstInfo {
u32 raw;
BitField<0, 8, TextureType> type;

View File

@ -126,7 +126,6 @@ OPCODE(CompositeExtractF64x4, F64, F64x
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
OPCODE(CompositeConstructArrayU32x2, Opaque, U32x2, U32x2, U32x2, U32x2, )
// Select operations
OPCODE(SelectU1, U1, U1, U1, U1, )

View File

@ -44,20 +44,6 @@ bool Value::IsEmpty() const noexcept {
return type == Type::Void;
}
bool Value::IsConstantContainer() const {
if (IsImmediate()) {
return true;
}
ValidateAccess(Type::Opaque);
auto num_args = inst->NumArgs();
for (size_t i = 0; i < num_args; i++) {
if (!inst->Arg(i).IsConstantContainer()) {
return false;
}
}
return true;
}
bool Value::IsImmediate() const noexcept {
if (IsIdentity()) {
return inst->Arg(0).IsImmediate();

View File

@ -38,7 +38,6 @@ public:
[[nodiscard]] bool IsImmediate() const noexcept;
[[nodiscard]] bool IsLabel() const noexcept;
[[nodiscard]] IR::Type Type() const noexcept;
[[nodiscard]] bool IsConstantContainer() const;
[[nodiscard]] IR::Inst* Inst() const;
[[nodiscard]] IR::Block* Label() const;

View File

@ -106,17 +106,17 @@ IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
throw NotImplementedException("Invalid texture type {}", type);
}
IR::Value MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
std::pair<IR::Value, IR::Value> MakeOffsetPTP(TranslatorVisitor& v, IR::Reg& reg) {
const IR::U32 value1{v.X(reg++)};
const IR::U32 value2{v.X(reg++)};
const IR::U32 bitsize = v.ir.Imm32(6);
const auto getVector = ([&v, &bitsize](const IR::U32& value, u32 base) {
return v.ir.CompositeConstruct(
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 0), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(base + 8), bitsize, true));
});
return v.ir.CompositeConstruct(getVector(value1, 0), getVector(value1, 16),
getVector(value2, 0), getVector(value2, 16));
const IR::U32 bitsize{v.ir.Imm32(6)};
const auto make_vector{[&v, &bitsize](const IR::U32& value) {
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(16), bitsize, true),
v.ir.BitFieldExtract(value, v.ir.Imm32(24), bitsize, true));
}};
return {make_vector(value1), make_vector(value2)};
}
void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetType offset_type,
@ -150,14 +150,12 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy
switch (offset_type) {
case OffsetType::None:
break;
case OffsetType::AOFFI: {
case OffsetType::AOFFI:
offset = MakeOffset(v, meta_reg, tld4.type);
break;
}
case OffsetType::PTP: {
offset2 = MakeOffsetPTP(v, meta_reg);
case OffsetType::PTP:
std::tie(offset, offset2) = MakeOffsetPTP(v, meta_reg);
break;
}
default:
throw NotImplementedException("Invalid offset type {}", offset_type);
}
@ -167,7 +165,7 @@ void Impl(TranslatorVisitor& v, u64 insn, ComponentType component_type, OffsetTy
IR::TextureInstInfo info{};
info.type.Assign(GetType(tld4.type, tld4.dc != 0));
info.gather_component.Assign(static_cast<u32>(component_type));
const IR::Value sample{[&]() -> IR::Value {
const IR::Value sample{[&] {
if (tld4.dc == 0) {
return v.ir.ImageGather(handle, coords, offset, offset2, info);
}

View File

@ -355,17 +355,6 @@ void FoldBranchConditional(IR::Inst& inst) {
}
}
void FoldConstantComposite(IR::Inst& inst, size_t amount = 2) {
for (size_t i = 0; i < amount; i++) {
if (!inst.Arg(i).IsConstantContainer()) {
return;
}
}
auto info{inst.Flags<IR::CompositeDecoration>()};
info.is_constant = true;
inst.SetFlags(info);
}
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
switch (inst.Opcode()) {
case IR::Opcode::GetRegister:
@ -391,13 +380,6 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
case IR::Opcode::SelectF32:
case IR::Opcode::SelectF64:
return FoldSelect(inst);
case IR::Opcode::CompositeConstructU32x2:
case IR::Opcode::CompositeConstructF16x2:
case IR::Opcode::CompositeConstructF32x2:
case IR::Opcode::CompositeConstructF64x2:
return FoldConstantComposite(inst, 2);
case IR::Opcode::CompositeConstructArrayU32x2:
return FoldConstantComposite(inst, 4);
case IR::Opcode::FPMul32:
return FoldFPMul32(inst);
case IR::Opcode::LogicalAnd:
@ -423,12 +405,12 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
return;
case IR::Opcode::BitFieldSExtract:
FoldWhenAllImmediates(inst, [](s32 base, u32 shift, u32 count) {
const size_t back_shift = static_cast<size_t>(shift) + static_cast<size_t>(count);
const size_t back_shift{static_cast<size_t>(shift) + static_cast<size_t>(count)};
if (back_shift > Common::BitSize<s32>()) {
throw LogicError("Undefined result in {}({}, {}, {})", IR::Opcode::BitFieldSExtract,
base, shift, count);
}
const size_t left_shift = Common::BitSize<s32>() - back_shift;
const size_t left_shift{Common::BitSize<s32>() - back_shift};
return static_cast<u32>(static_cast<s32>(base << left_shift) >>
static_cast<size_t>(Common::BitSize<s32>() - count));
});

View File

@ -30,7 +30,6 @@ struct Profile {
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
bool support_vote{};
bool support_variadic_ptp{};
bool warp_size_potentially_larger_than_guest{};
// FClamp is broken and OpFMax + OpFMin should be used instead