Merge pull request #3287 from ReinUsesLisp/ldg-stg-16
shader_ir/memory: Implement u16 and u8 for STG and LDG
This commit is contained in:
commit
55f95e7f26
|
@ -6,6 +6,7 @@
|
|||
#include <vector>
|
||||
#include <fmt/format.h>
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/logging/log.h"
|
||||
|
@ -22,34 +23,39 @@ using Tegra::Shader::Register;
|
|||
|
||||
namespace {
|
||||
|
||||
u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
|
||||
return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
|
||||
uniform_type == Tegra::Shader::UniformType::UnsignedShort;
|
||||
}
|
||||
|
||||
u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
return 1;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
return 2;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
return 4;
|
||||
return 0b11;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 0b10;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
||||
return 1;
|
||||
UNREACHABLE();
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||
switch (uniform_type) {
|
||||
case Tegra::Shader::UniformType::UnsignedByte:
|
||||
return 8;
|
||||
case Tegra::Shader::UniformType::UnsignedShort:
|
||||
return 16;
|
||||
case Tegra::Shader::UniformType::Single:
|
||||
return 1;
|
||||
return 32;
|
||||
case Tegra::Shader::UniformType::Double:
|
||||
return 2;
|
||||
return 64;
|
||||
case Tegra::Shader::UniformType::Quad:
|
||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||
return 4;
|
||||
return 128;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
||||
return 1;
|
||||
return 32;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
}();
|
||||
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, false);
|
||||
TrackGlobalMemory(bb, instr, true, false);
|
||||
|
||||
const u32 count = GetLdgMemorySize(type);
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, load zeroes.
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
|
@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
|
||||
if (type == Tegra::Shader::UniformType::UnsignedByte) {
|
||||
// To handle unaligned loads get the byte used to dereferenced global memory
|
||||
// and extract that byte from the loaded uint32.
|
||||
Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
|
||||
byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
|
||||
// To handle unaligned loads get the bytes used to dereference global memory and extract
|
||||
// those bytes from the loaded u32.
|
||||
if (IsUnaligned(type)) {
|
||||
Node mask = Immediate(GetUnalignedMask(type));
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||
|
||||
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
|
||||
Immediate(8));
|
||||
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
|
||||
std::move(offset), Immediate(size));
|
||||
}
|
||||
|
||||
SetTemporary(bb, i, gmem);
|
||||
|
@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
}
|
||||
}();
|
||||
|
||||
// For unaligned reads we have to read memory too.
|
||||
const bool is_read = IsUnaligned(type);
|
||||
const auto [real_address_base, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true);
|
||||
TrackGlobalMemory(bb, instr, is_read, true);
|
||||
if (!real_address_base || !base_address) {
|
||||
// Tracking failed, skip the store.
|
||||
break;
|
||||
}
|
||||
|
||||
const u32 count = GetStgMemorySize(type);
|
||||
const u32 size = GetMemorySize(type);
|
||||
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||
for (u32 i = 0; i < count; ++i) {
|
||||
const Node it_offset = Immediate(i * 4);
|
||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
const Node value = GetRegister(instr.gpr0.Value() + i);
|
||||
Node value = GetRegister(instr.gpr0.Value() + i);
|
||||
|
||||
if (IsUnaligned(type)) {
|
||||
Node mask = Immediate(GetUnalignedMask(type));
|
||||
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||
|
||||
value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
|
||||
Immediate(size));
|
||||
}
|
||||
|
||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||
}
|
||||
break;
|
||||
|
@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
|
||||
Instruction instr,
|
||||
bool is_write) {
|
||||
bool is_read, bool is_write) {
|
||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||
|
||||
|
@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
|
|||
const GlobalMemoryBase descriptor{index, offset};
|
||||
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
||||
auto& usage = entry->second;
|
||||
if (is_write) {
|
||||
usage.is_written = true;
|
||||
} else {
|
||||
usage.is_read = true;
|
||||
}
|
||||
usage.is_written |= is_write;
|
||||
usage.is_read |= is_read;
|
||||
|
||||
const auto real_address =
|
||||
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
||||
|
|
|
@ -394,7 +394,7 @@ private:
|
|||
|
||||
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
|
||||
Tegra::Shader::Instruction instr,
|
||||
bool is_write);
|
||||
bool is_read, bool is_write);
|
||||
|
||||
/// Register new amending code and obtain the reference id.
|
||||
std::size_t DeclareAmend(Node new_amend);
|
||||
|
|
Reference in New Issue