shader_decode: SULD.D implement bits64 and reverse shader ir init method to removed shader stage.
This commit is contained in:
parent
730f9b55b3
commit
2906372ba1
|
@ -234,7 +234,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
const std::size_t size_in_bytes = code.size() * sizeof(u64);
|
||||||
|
|
||||||
auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
|
auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
|
||||||
const ShaderIR ir(code, shader_type, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||||
// TODO(Rodrigo): Handle VertexA shaders
|
// TODO(Rodrigo): Handle VertexA shaders
|
||||||
// std::optional<ShaderIR> ir_b;
|
// std::optional<ShaderIR> ir_b;
|
||||||
// if (!code_b.empty()) {
|
// if (!code_b.empty()) {
|
||||||
|
@ -264,7 +264,7 @@ Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, Prog
|
||||||
|
|
||||||
auto& engine = params.system.GPU().KeplerCompute();
|
auto& engine = params.system.GPU().KeplerCompute();
|
||||||
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
|
auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
|
||||||
const ShaderIR ir(code, ShaderType::Compute, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
|
||||||
const u64 uid = params.unique_identifier;
|
const u64 uid = params.unique_identifier;
|
||||||
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
|
auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
|
||||||
|
|
||||||
|
@ -341,7 +341,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
const bool is_compute = entry.type == ShaderType::Compute;
|
const bool is_compute = entry.type == ShaderType::Compute;
|
||||||
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
|
const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
|
||||||
auto registry = MakeRegistry(entry);
|
auto registry = MakeRegistry(entry);
|
||||||
const ShaderIR ir(entry.code, entry.type, main_offset, COMPILER_SETTINGS, *registry);
|
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
|
||||||
|
|
||||||
std::shared_ptr<OGLProgram> program;
|
std::shared_ptr<OGLProgram> program;
|
||||||
if (precompiled_entry) {
|
if (precompiled_entry) {
|
||||||
|
|
|
@ -162,7 +162,7 @@ CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stag
|
||||||
ProgramCode program_code, u32 main_offset)
|
ProgramCode program_code, u32 main_offset)
|
||||||
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
|
: RasterizerCacheObject{host_ptr}, gpu_addr{gpu_addr}, cpu_addr{cpu_addr},
|
||||||
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
|
program_code{std::move(program_code)}, registry{stage, GetEngine(system, stage)},
|
||||||
shader_ir{this->program_code, stage, main_offset, compiler_settings, registry},
|
shader_ir{this->program_code, main_offset, compiler_settings, registry},
|
||||||
entries{GenerateShaderEntries(shader_ir)} {}
|
entries{GenerateShaderEntries(shader_ir)} {}
|
||||||
|
|
||||||
CachedShader::~CachedShader() = default;
|
CachedShader::~CachedShader() = default;
|
||||||
|
|
|
@ -271,6 +271,40 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
|
||||||
}
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
Node ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
|
||||||
|
const Node original_value, bool* is_signed) {
|
||||||
|
switch (component_type) {
|
||||||
|
case ComponentType::SNORM: {
|
||||||
|
*is_signed = true;
|
||||||
|
// range [-1.0, 1.0]
|
||||||
|
auto cnv_value = Operation(OperationCode::FMul, original_value,
|
||||||
|
Immediate((1 << component_size) / 2.f - 1.f));
|
||||||
|
cnv_value = SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value));
|
||||||
|
return BitfieldExtract(std::move(cnv_value), 0, component_size);
|
||||||
|
}
|
||||||
|
case ComponentType::SINT:
|
||||||
|
case ComponentType::UNORM: {
|
||||||
|
*is_signed = component_type == ComponentType::SINT;
|
||||||
|
// range [0.0, 1.0]
|
||||||
|
auto cnv_value =
|
||||||
|
Operation(OperationCode::FMul, original_value, Immediate((1 << component_size) - 1.f));
|
||||||
|
return SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value));
|
||||||
|
}
|
||||||
|
case ComponentType::UINT: // range [0, (1 << component_size) - 1]
|
||||||
|
*is_signed = false;
|
||||||
|
return original_value;
|
||||||
|
case ComponentType::FLOAT:
|
||||||
|
if (component_size == 16) {
|
||||||
|
return Operation(OperationCode::HCastFloat, original_value);
|
||||||
|
} else {
|
||||||
|
return original_value;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
|
||||||
|
return original_value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||||
const Instruction instr = {program_code[pc]};
|
const Instruction instr = {program_code[pc]};
|
||||||
const auto opcode = OpCode::Decode(instr);
|
const auto opcode = OpCode::Decode(instr);
|
||||||
|
@ -309,7 +343,8 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||||
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||||
}
|
}
|
||||||
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
|
} else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
|
||||||
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32);
|
UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
|
||||||
|
instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
|
||||||
|
|
||||||
auto descriptor = [this, instr] {
|
auto descriptor = [this, instr] {
|
||||||
std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
|
std::optional<Tegra::Engines::SamplerDescriptor> descriptor;
|
||||||
|
@ -333,7 +368,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||||
switch (instr.suldst.GetStoreDataLayout()) {
|
switch (instr.suldst.GetStoreDataLayout()) {
|
||||||
case StoreType::Bits32: {
|
case StoreType::Bits32: {
|
||||||
u32 shifted_counter = 0;
|
u32 shifted_counter = 0;
|
||||||
// value should be RGBA format
|
|
||||||
Node value = Immediate(0);
|
Node value = Immediate(0);
|
||||||
for (u32 element = 0; element < 4; ++element) {
|
for (u32 element = 0; element < 4; ++element) {
|
||||||
if (!IsComponentEnabled(comp_mask, element)) {
|
if (!IsComponentEnabled(comp_mask, element)) {
|
||||||
|
@ -343,39 +377,12 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||||
const auto component_size = GetComponentSize(descriptor.format, element);
|
const auto component_size = GetComponentSize(descriptor.format, element);
|
||||||
bool is_signed = true;
|
bool is_signed = true;
|
||||||
MetaImage meta{image, {}, element};
|
MetaImage meta{image, {}, element};
|
||||||
const Node original_value =
|
|
||||||
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
|
|
||||||
|
|
||||||
Node converted_value = [&] {
|
Node converted_value = GetComponentValue(
|
||||||
switch (component_type) {
|
component_type, component_size,
|
||||||
case ComponentType::SNORM: {
|
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)),
|
||||||
is_signed = true;
|
&is_signed);
|
||||||
// range [-1.0, 1.0]
|
|
||||||
auto cnv_value =
|
|
||||||
Operation(OperationCode::FMul, original_value, Immediate(127.f));
|
|
||||||
cnv_value = SignedOperation(OperationCode::ICastFloat, is_signed,
|
|
||||||
std::move(cnv_value));
|
|
||||||
return BitfieldExtract(std::move(cnv_value), 0, 8);
|
|
||||||
}
|
|
||||||
case ComponentType::SINT:
|
|
||||||
case ComponentType::UNORM: {
|
|
||||||
is_signed = false;
|
|
||||||
// range [0.0, 1.0]
|
|
||||||
auto cnv_value =
|
|
||||||
Operation(OperationCode::FMul, original_value, Immediate(255.f));
|
|
||||||
return SignedOperation(OperationCode::ICastFloat, is_signed,
|
|
||||||
std::move(cnv_value));
|
|
||||||
}
|
|
||||||
case ComponentType::UINT: // range [0, 255]
|
|
||||||
is_signed = false;
|
|
||||||
return original_value;
|
|
||||||
case ComponentType::FLOAT:
|
|
||||||
return original_value;
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("Unimplement component type={}", component_type);
|
|
||||||
return original_value;
|
|
||||||
}
|
|
||||||
}();
|
|
||||||
// shift element to correct position
|
// shift element to correct position
|
||||||
const auto shifted = shifted_counter;
|
const auto shifted = shifted_counter;
|
||||||
if (shifted > 0) {
|
if (shifted > 0) {
|
||||||
|
@ -391,6 +398,56 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
|
||||||
SetRegister(bb, instr.gpr0.Value(), std::move(value));
|
SetRegister(bb, instr.gpr0.Value(), std::move(value));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case StoreType::Bits64: {
|
||||||
|
u32 indexer = 0;
|
||||||
|
u32 shifted_counter = 0;
|
||||||
|
Node value = Immediate(0);
|
||||||
|
for (u32 element = 0; element < 4; ++element) {
|
||||||
|
if (!IsComponentEnabled(comp_mask, element)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const auto component_type = GetComponentType(descriptor, element);
|
||||||
|
const auto component_size = GetComponentSize(descriptor.format, element);
|
||||||
|
|
||||||
|
bool is_signed = true;
|
||||||
|
MetaImage meta{image, {}, element};
|
||||||
|
|
||||||
|
Node converted_value = GetComponentValue(
|
||||||
|
component_type, component_size,
|
||||||
|
Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)),
|
||||||
|
&is_signed);
|
||||||
|
|
||||||
|
// shift element to correct position
|
||||||
|
const auto shifted = shifted_counter;
|
||||||
|
if (shifted > 0) {
|
||||||
|
converted_value =
|
||||||
|
SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
|
||||||
|
std::move(converted_value), Immediate(shifted));
|
||||||
|
}
|
||||||
|
shifted_counter += component_size;
|
||||||
|
|
||||||
|
// add value into result
|
||||||
|
value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
|
||||||
|
|
||||||
|
// if we shifted enough for 1 byte -> we save it into temp
|
||||||
|
if (shifted_counter >= 32) {
|
||||||
|
SetTemporary(bb, indexer++, std::move(value));
|
||||||
|
|
||||||
|
// we only use 2 bytes for bits64
|
||||||
|
if (indexer >= 2) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// reset counter and value to prepare pack next byte
|
||||||
|
value = Immediate(0);
|
||||||
|
shifted_counter = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < indexer; ++i) {
|
||||||
|
SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -24,10 +24,9 @@ using Tegra::Shader::PredCondition;
|
||||||
using Tegra::Shader::PredOperation;
|
using Tegra::Shader::PredOperation;
|
||||||
using Tegra::Shader::Register;
|
using Tegra::Shader::Register;
|
||||||
|
|
||||||
ShaderIR::ShaderIR(const ProgramCode& program_code, Tegra::Engines::ShaderType shader_stage,
|
ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
|
||||||
u32 main_offset, CompilerSettings settings, Registry& registry)
|
Registry& registry)
|
||||||
: program_code{program_code}, shader_stage{shader_stage},
|
: program_code{program_code}, main_offset{main_offset}, settings{settings}, registry{registry} {
|
||||||
main_offset{main_offset}, settings{settings}, registry{registry} {
|
|
||||||
Decode();
|
Decode();
|
||||||
PostDecode();
|
PostDecode();
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,8 +68,8 @@ struct GlobalMemoryUsage {
|
||||||
|
|
||||||
class ShaderIR final {
|
class ShaderIR final {
|
||||||
public:
|
public:
|
||||||
explicit ShaderIR(const ProgramCode& program_code, Tegra::Engines::ShaderType shader_stage,
|
explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
|
||||||
u32 main_offset, CompilerSettings settings, Registry& registry);
|
Registry& registry);
|
||||||
~ShaderIR();
|
~ShaderIR();
|
||||||
|
|
||||||
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
const std::map<u32, NodeBlock>& GetBasicBlocks() const {
|
||||||
|
@ -312,6 +312,10 @@ private:
|
||||||
/// Conditionally saturates a half float pair
|
/// Conditionally saturates a half float pair
|
||||||
Node GetSaturatedHalfFloat(Node value, bool saturate = true);
|
Node GetSaturatedHalfFloat(Node value, bool saturate = true);
|
||||||
|
|
||||||
|
/// Get image component value by type and size
|
||||||
|
Node GetComponentValue(Tegra::Texture::ComponentType component_type, u32 component_size,
|
||||||
|
const Node original_value, bool* is_signed);
|
||||||
|
|
||||||
/// Returns a predicate comparing two floats
|
/// Returns a predicate comparing two floats
|
||||||
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
|
Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
|
||||||
/// Returns a predicate comparing two integers
|
/// Returns a predicate comparing two integers
|
||||||
|
@ -419,7 +423,6 @@ private:
|
||||||
u32 NewCustomVariable();
|
u32 NewCustomVariable();
|
||||||
|
|
||||||
const ProgramCode& program_code;
|
const ProgramCode& program_code;
|
||||||
const Tegra::Engines::ShaderType shader_stage;
|
|
||||||
const u32 main_offset;
|
const u32 main_offset;
|
||||||
const CompilerSettings settings;
|
const CompilerSettings settings;
|
||||||
Registry& registry;
|
Registry& registry;
|
||||||
|
|
Reference in New Issue