shader_ir: Unify blocks in decompiled shaders.
This commit is contained in:
parent
926b80102f
commit
d5533b440c
|
@ -191,11 +191,13 @@ public:
|
||||||
|
|
||||||
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
|
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
|
||||||
// unlikely that shaders will use 20 nested SSYs and PBKs.
|
// unlikely that shaders will use 20 nested SSYs and PBKs.
|
||||||
|
if (!ir.IsFlowStackDisabled()) {
|
||||||
constexpr u32 FLOW_STACK_SIZE = 20;
|
constexpr u32 FLOW_STACK_SIZE = 20;
|
||||||
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
|
for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
|
||||||
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
|
code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
|
||||||
code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
|
code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
code.AddLine("while (true) {{");
|
code.AddLine("while (true) {{");
|
||||||
++code.scope;
|
++code.scope;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
|
|
||||||
#include <list>
|
#include <list>
|
||||||
|
#include <map>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -104,28 +105,6 @@ struct BlockInfo {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Stamp {
|
|
||||||
Stamp() = default;
|
|
||||||
Stamp(u32 address, u32 target) : address{address}, target{target} {}
|
|
||||||
u32 address{};
|
|
||||||
u32 target{};
|
|
||||||
bool operator==(const Stamp& sb) const {
|
|
||||||
return std::tie(address, target) == std::tie(sb.address, sb.target);
|
|
||||||
}
|
|
||||||
bool operator<(const Stamp& sb) const {
|
|
||||||
return address < sb.address;
|
|
||||||
}
|
|
||||||
bool operator>(const Stamp& sb) const {
|
|
||||||
return address > sb.address;
|
|
||||||
}
|
|
||||||
bool operator<=(const Stamp& sb) const {
|
|
||||||
return address <= sb.address;
|
|
||||||
}
|
|
||||||
bool operator>=(const Stamp& sb) const {
|
|
||||||
return address >= sb.address;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct CFGRebuildState {
|
struct CFGRebuildState {
|
||||||
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
|
explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size)
|
||||||
: program_code{program_code}, program_size{program_size} {
|
: program_code{program_code}, program_size{program_size} {
|
||||||
|
@ -144,8 +123,8 @@ struct CFGRebuildState {
|
||||||
std::list<Query> queries{};
|
std::list<Query> queries{};
|
||||||
std::unordered_map<u32, u32> registered{};
|
std::unordered_map<u32, u32> registered{};
|
||||||
std::unordered_set<u32> labels{};
|
std::unordered_set<u32> labels{};
|
||||||
std::set<Stamp> ssy_labels;
|
std::map<u32, u32> ssy_labels;
|
||||||
std::set<Stamp> pbk_labels;
|
std::map<u32, u32> pbk_labels;
|
||||||
std::unordered_map<u32, BlockStack> stacks{};
|
std::unordered_map<u32, BlockStack> stacks{};
|
||||||
const ProgramCode& program_code;
|
const ProgramCode& program_code;
|
||||||
const std::size_t program_size;
|
const std::size_t program_size;
|
||||||
|
@ -393,7 +372,7 @@ bool TryInspectAddress(CFGRebuildState& state) {
|
||||||
}
|
}
|
||||||
case BlockCollision::Inside: {
|
case BlockCollision::Inside: {
|
||||||
// This case is the tricky one:
|
// This case is the tricky one:
|
||||||
// We need to Split the block in 2 sepprate blocks
|
// We need to Split the block in 2 sepparate blocks
|
||||||
auto it = search_result.second;
|
auto it = search_result.second;
|
||||||
block_info = CreateBlockInfo(state, address, it->end);
|
block_info = CreateBlockInfo(state, address, it->end);
|
||||||
it->end = address - 1;
|
it->end = address - 1;
|
||||||
|
@ -428,13 +407,11 @@ bool TryInspectAddress(CFGRebuildState& state) {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TryQuery(CFGRebuildState& state) {
|
bool TryQuery(CFGRebuildState& state) {
|
||||||
auto gather_labels = ([](ControlStack& cc, std::set<Stamp> labels, BlockInfo& block) {
|
auto gather_labels = ([](ControlStack& cc, std::map<u32, u32>& labels, BlockInfo& block) {
|
||||||
Stamp start{block.start, 0};
|
auto gather_start = labels.lower_bound(block.start);
|
||||||
Stamp end{block.end, 0};
|
auto gather_end = labels.upper_bound(block.end);
|
||||||
auto gather_start = labels.lower_bound(start);
|
|
||||||
auto gather_end = labels.upper_bound(end);
|
|
||||||
while (gather_start != gather_end) {
|
while (gather_start != gather_end) {
|
||||||
cc.Push(gather_start->target);
|
cc.Push(gather_start->second);
|
||||||
gather_start++;
|
gather_start++;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
@ -444,9 +421,13 @@ bool TryQuery(CFGRebuildState& state) {
|
||||||
Query& q = state.queries.front();
|
Query& q = state.queries.front();
|
||||||
u32 block_index = state.registered[q.address];
|
u32 block_index = state.registered[q.address];
|
||||||
BlockInfo& block = state.block_info[block_index];
|
BlockInfo& block = state.block_info[block_index];
|
||||||
|
// If the block is visted, check if the stacks match, else gather the ssy/pbk
|
||||||
|
// labels into the current stack and look if the branch at the end of the block
|
||||||
|
// consumes a label. Schedule new queries accordingly
|
||||||
if (block.visited) {
|
if (block.visited) {
|
||||||
BlockStack& stack = state.stacks[q.address];
|
BlockStack& stack = state.stacks[q.address];
|
||||||
bool all_okay = q.ssy_stack.Compare(stack.ssy_stack) && q.pbk_stack.Compare(stack.pbk_stack);
|
bool all_okay = (stack.ssy_stack.Size() == 0 || q.ssy_stack.Compare(stack.ssy_stack)) &&
|
||||||
|
(stack.pbk_stack.Size() == 0 || q.pbk_stack.Compare(stack.pbk_stack));
|
||||||
state.queries.pop_front();
|
state.queries.pop_front();
|
||||||
return all_okay;
|
return all_okay;
|
||||||
}
|
}
|
||||||
|
@ -523,8 +504,10 @@ bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_addre
|
||||||
result_out.blocks.push_back(new_block);
|
result_out.blocks.push_back(new_block);
|
||||||
}
|
}
|
||||||
if (result_out.decompilable) {
|
if (result_out.decompilable) {
|
||||||
|
result_out.labels = std::move(state.labels);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// If it's not decompilable, merge the unlabelled blocks together
|
||||||
auto back = result_out.blocks.begin();
|
auto back = result_out.blocks.begin();
|
||||||
auto next = std::next(back);
|
auto next = std::next(back);
|
||||||
while (next != result_out.blocks.end()) {
|
while (next != result_out.blocks.end()) {
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <vector>
|
#include <unordered_set>
|
||||||
|
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
@ -48,6 +48,7 @@ struct ShaderCharacteristics {
|
||||||
bool decompilable{};
|
bool decompilable{};
|
||||||
u32 start;
|
u32 start;
|
||||||
u32 end;
|
u32 end;
|
||||||
|
std::unordered_set<u32> labels{};
|
||||||
};
|
};
|
||||||
|
|
||||||
bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
|
bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address,
|
||||||
|
|
|
@ -38,32 +38,47 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
|
||||||
void ShaderIR::Decode() {
|
void ShaderIR::Decode() {
|
||||||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||||
|
|
||||||
|
disable_flow_stack = false;
|
||||||
ShaderCharacteristics shader_info{};
|
ShaderCharacteristics shader_info{};
|
||||||
bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);
|
bool can_proceed = ScanFlow(program_code, program_code.size(), main_offset, shader_info);
|
||||||
if (can_proceed) {
|
if (can_proceed) {
|
||||||
coverage_begin = shader_info.start;
|
coverage_begin = shader_info.start;
|
||||||
coverage_end = shader_info.end;
|
coverage_end = shader_info.end;
|
||||||
if (shader_info.decompilable) {
|
if (shader_info.decompilable) {
|
||||||
std::list<ShaderBlock>& blocks = shader_info.blocks;
|
disable_flow_stack = true;
|
||||||
for (auto& block : blocks) {
|
auto insert_block = ([this](NodeBlock& nodes, u32 label) {
|
||||||
NodeBlock nodes;
|
if (label == exit_branch) {
|
||||||
if (!block.ignore_branch) {
|
|
||||||
nodes = DecodeRange(block.start, block.end);
|
|
||||||
InsertControlFlow(nodes, block);
|
|
||||||
} else {
|
|
||||||
nodes = DecodeRange(block.start, block.end + 1);
|
|
||||||
}
|
|
||||||
basic_blocks.insert({block.start, nodes});
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
basic_blocks.insert({label, nodes});
|
||||||
|
});
|
||||||
|
std::list<ShaderBlock>& blocks = shader_info.blocks;
|
||||||
|
NodeBlock current_block;
|
||||||
|
u32 current_label = exit_branch;
|
||||||
|
for (auto& block : blocks) {
|
||||||
|
if (shader_info.labels.count(block.start) != 0) {
|
||||||
|
insert_block(current_block, current_label);
|
||||||
|
current_block.clear();
|
||||||
|
current_label = block.start;
|
||||||
|
}
|
||||||
|
if (!block.ignore_branch) {
|
||||||
|
DecodeRangeInner(current_block, block.start, block.end);
|
||||||
|
InsertControlFlow(current_block, block);
|
||||||
|
} else {
|
||||||
|
DecodeRangeInner(current_block, block.start, block.end + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
insert_block(current_block, current_label);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
|
||||||
// we can't decompile it, fallback to standard method
|
// we can't decompile it, fallback to standard method
|
||||||
for (const auto& block : shader_info.blocks) {
|
for (const auto& block : shader_info.blocks) {
|
||||||
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG_WARNING(HW_GPU, "Flow Analysis failed, falling back to brute force compiling");
|
LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
|
||||||
|
|
||||||
// Now we need to deal with an undecompilable shader. We need to brute force
|
// Now we need to deal with an undecompilable shader. We need to brute force
|
||||||
// a shader that captures every position.
|
// a shader that captures every position.
|
||||||
|
@ -78,12 +93,16 @@ void ShaderIR::Decode() {
|
||||||
|
|
||||||
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
|
||||||
NodeBlock basic_block;
|
NodeBlock basic_block;
|
||||||
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
DecodeRangeInner(basic_block, begin, end);
|
||||||
pc = DecodeInstr(basic_block, pc);
|
|
||||||
}
|
|
||||||
return basic_block;
|
return basic_block;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
|
||||||
|
for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
|
||||||
|
pc = DecodeInstr(bb, pc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
|
||||||
auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
|
auto apply_conditions = ([&](const Condition& cond, Node n) -> Node {
|
||||||
Node result = n;
|
Node result = n;
|
||||||
|
|
|
@ -98,9 +98,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
} else {
|
} else {
|
||||||
const u32 target = pc + 1;
|
const u32 target = pc + 1;
|
||||||
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
|
const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
|
||||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
|
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||||
true, PRECISE, op_a, Immediate(3));
|
PRECISE, op_a, Immediate(3));
|
||||||
const Node operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
const Node operand =
|
||||||
|
Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||||
branch = Operation(OperationCode::BranchIndirect, convert);
|
branch = Operation(OperationCode::BranchIndirect, convert);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -119,14 +120,14 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
const Node index = GetRegister(instr.gpr8);
|
const Node index = GetRegister(instr.gpr8);
|
||||||
const Node op_a =
|
const Node op_a =
|
||||||
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
|
||||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
|
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||||
true, PRECISE, op_a, Immediate(3));
|
PRECISE, op_a, Immediate(3));
|
||||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||||
} else {
|
} else {
|
||||||
const s32 target = pc + instr.brx.GetBranchExtend();
|
const s32 target = pc + instr.brx.GetBranchExtend();
|
||||||
const Node op_a = GetRegister(instr.gpr8);
|
const Node op_a = GetRegister(instr.gpr8);
|
||||||
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight,
|
const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
|
||||||
true, PRECISE, op_a, Immediate(3));
|
PRECISE, op_a, Immediate(3));
|
||||||
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
|
||||||
}
|
}
|
||||||
const Node branch = Operation(OperationCode::BranchIndirect, operand);
|
const Node branch = Operation(OperationCode::BranchIndirect, operand);
|
||||||
|
@ -143,6 +144,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||||
"Constant buffer flow is not supported");
|
"Constant buffer flow is not supported");
|
||||||
|
|
||||||
|
if (disable_flow_stack) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
|
// The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
|
||||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||||
bb.push_back(
|
bb.push_back(
|
||||||
|
@ -153,6 +158,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
|
||||||
"Constant buffer PBK is not supported");
|
"Constant buffer PBK is not supported");
|
||||||
|
|
||||||
|
if (disable_flow_stack) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// PBK pushes to a stack the address where BRK will jump to.
|
// PBK pushes to a stack the address where BRK will jump to.
|
||||||
const u32 target = pc + instr.bra.GetBranchTarget();
|
const u32 target = pc + instr.bra.GetBranchTarget();
|
||||||
bb.push_back(
|
bb.push_back(
|
||||||
|
@ -164,6 +173,10 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "SYNC condition code used: {}",
|
||||||
static_cast<u32>(cc));
|
static_cast<u32>(cc));
|
||||||
|
|
||||||
|
if (disable_flow_stack) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
// The SYNC opcode jumps to the address previously set by the SSY opcode
|
||||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
|
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
|
||||||
break;
|
break;
|
||||||
|
@ -172,6 +185,9 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
|
||||||
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
|
||||||
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
|
UNIMPLEMENTED_IF_MSG(cc != Tegra::Shader::ConditionCode::T, "BRK condition code used: {}",
|
||||||
static_cast<u32>(cc));
|
static_cast<u32>(cc));
|
||||||
|
if (disable_flow_stack) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// The BRK opcode jumps to the address previously set by the PBK opcode
|
// The BRK opcode jumps to the address previously set by the PBK opcode
|
||||||
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
|
bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
|
||||||
|
|
|
@ -123,10 +123,15 @@ public:
|
||||||
return header;
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool IsFlowStackDisabled() const {
|
||||||
|
return disable_flow_stack;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Decode();
|
void Decode();
|
||||||
|
|
||||||
NodeBlock DecodeRange(u32 begin, u32 end);
|
NodeBlock DecodeRange(u32 begin, u32 end);
|
||||||
|
void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
|
||||||
void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
|
void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -320,6 +325,7 @@ private:
|
||||||
const ProgramCode& program_code;
|
const ProgramCode& program_code;
|
||||||
const u32 main_offset;
|
const u32 main_offset;
|
||||||
const std::size_t program_size;
|
const std::size_t program_size;
|
||||||
|
bool disable_flow_stack{};
|
||||||
|
|
||||||
u32 coverage_begin{};
|
u32 coverage_begin{};
|
||||||
u32 coverage_end{};
|
u32 coverage_end{};
|
||||||
|
|
Reference in New Issue