yuzu-emu
/
yuzu
Archived
1
0
Fork 0

shader: Reimplement GetCbufU64 as GetCbufU32x2

It may generate better code on some compilers and it's easier to handle.
This commit is contained in:
ReinUsesLisp 2021-04-04 02:31:09 -03:00 committed by ameerj
parent 5b3c6d59c2
commit 3f594dd86b
9 changed files with 21 additions and 22 deletions

View File

@ -308,8 +308,8 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
if (True(info.used_constant_buffer_types & IR::Type::F32)) {
DefineConstantBuffers(info, &UniformDefinitions::F32, binding, F32[1], 'f', sizeof(f32));
}
if (True(info.used_constant_buffer_types & IR::Type::U64)) {
DefineConstantBuffers(info, &UniformDefinitions::U64, binding, U64, 'u', sizeof(u64));
if (True(info.used_constant_buffer_types & IR::Type::U32x2)) {
DefineConstantBuffers(info, &UniformDefinitions::U32x2, binding, U32[2], 'u', sizeof(u64));
}
for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
binding += desc.count;

View File

@ -42,7 +42,7 @@ struct UniformDefinitions {
Id S16{};
Id U32{};
Id F32{};
Id U64{};
Id U32x2{};
};
class EmitContext final : public Sirit::Module {

View File

@ -47,7 +47,7 @@ Id EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
Id EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufU32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset);
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr);
void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, Id value);
void EmitGetAttributeIndexed(EmitContext& ctx);

View File

@ -153,8 +153,8 @@ Id EmitGetCbufF32(EmitContext& ctx, const IR::Value& binding, const IR::Value& o
return GetCbuf(ctx, ctx.F32[1], &UniformDefinitions::F32, sizeof(f32), binding, offset);
}
Id EmitGetCbufU64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
return GetCbuf(ctx, ctx.U64, &UniformDefinitions::U64, sizeof(u64), binding, offset);
Id EmitGetCbufU32x2(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset) {
return GetCbuf(ctx, ctx.U32[2], &UniformDefinitions::U32x2, sizeof(u32[2]), binding, offset);
}
Id EmitGetAttribute(EmitContext& ctx, IR::Attribute attr) {

View File

@ -162,7 +162,7 @@ U32 IREmitter::GetCbuf(const U32& binding, const U32& byte_offset) {
return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
}
UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
Value IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
bool is_signed) {
switch (bitsize) {
case 8:
@ -172,7 +172,7 @@ UAny IREmitter::GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsi
case 32:
return Inst<U32>(Opcode::GetCbufU32, binding, byte_offset);
case 64:
return Inst<U64>(Opcode::GetCbufU64, binding, byte_offset);
return Inst(Opcode::GetCbufU32x2, binding, byte_offset);
default:
throw InvalidArgument("Invalid bit size {}", bitsize);
}

View File

@ -56,7 +56,7 @@ public:
void SetIndirectBranchVariable(const U32& value);
[[nodiscard]] U32 GetCbuf(const U32& binding, const U32& byte_offset);
[[nodiscard]] UAny GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
[[nodiscard]] Value GetCbuf(const U32& binding, const U32& byte_offset, size_t bitsize,
bool is_signed);
[[nodiscard]] F32 GetFloatCbuf(const U32& binding, const U32& byte_offset);

View File

@ -40,7 +40,7 @@ OPCODE(GetCbufU16, U32, U32,
OPCODE(GetCbufS16, U32, U32, U32, )
OPCODE(GetCbufU32, U32, U32, U32, )
OPCODE(GetCbufF32, F32, U32, U32, )
OPCODE(GetCbufU64, U64, U32, U32, )
OPCODE(GetCbufU32x2, U32x2, U32, U32, )
OPCODE(GetAttribute, F32, Attribute, )
OPCODE(SetAttribute, Void, Attribute, F32, )
OPCODE(GetAttributeIndexed, F32, U32, )

View File

@ -30,25 +30,25 @@ void TranslatorVisitor::LDC(u64 insn) {
const auto [index, offset]{Slot(ir, ldc.mode, imm_index, reg, imm)};
switch (ldc.size) {
case Size::U8:
X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, false));
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, false)});
break;
case Size::S8:
X(ldc.dest_reg, ir.GetCbuf(index, offset, 8, true));
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 8, true)});
break;
case Size::U16:
X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, false));
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, false)});
break;
case Size::S16:
X(ldc.dest_reg, ir.GetCbuf(index, offset, 16, true));
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 16, true)});
break;
case Size::B32:
X(ldc.dest_reg, ir.GetCbuf(index, offset, 32, false));
X(ldc.dest_reg, IR::U32{ir.GetCbuf(index, offset, 32, false)});
break;
case Size::B64: {
if (!IR::IsAligned(ldc.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination register");
}
const IR::Value vector{ir.UnpackUint2x32(ir.GetCbuf(index, offset, 64, false))};
const IR::Value vector{ir.GetCbuf(index, offset, 64, false)};
for (int i = 0; i < 2; ++i) {
X(ldc.dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
}

View File

@ -271,7 +271,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
break;
}
switch (inst.Opcode()) {
case IR::Opcode::GetCbufU64:
case IR::Opcode::UndefU64:
case IR::Opcode::LoadGlobalU8:
case IR::Opcode::LoadGlobalS8:
@ -349,7 +348,7 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GetCbufS16:
case IR::Opcode::GetCbufU32:
case IR::Opcode::GetCbufF32:
case IR::Opcode::GetCbufU64: {
case IR::Opcode::GetCbufU32x2: {
if (const IR::Value index{inst.Arg(0)}; index.IsImmediate()) {
AddConstantBufferDescriptor(info, index.U32(), 1);
} else {
@ -370,8 +369,8 @@ void VisitUsages(Info& info, IR::Inst& inst) {
case IR::Opcode::GetCbufF32:
info.used_constant_buffer_types |= IR::Type::F32;
break;
case IR::Opcode::GetCbufU64:
info.used_constant_buffer_types |= IR::Type::U64;
case IR::Opcode::GetCbufU32x2:
info.used_constant_buffer_types |= IR::Type::U32x2;
break;
default:
break;