From 8af6e6a05207b1c9736bd80a89ec3aed1f96dfea Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 24 Jun 2019 19:46:49 -0400 Subject: [PATCH] shader_ir: Implement a new shader scanner --- CMakeModules/GenerateSCMRev.cmake | 2 + src/common/CMakeLists.txt | 2 + src/video_core/CMakeLists.txt | 2 + src/video_core/shader/control_flow.cpp | 393 +++++++++++++++++++++++++ src/video_core/shader/control_flow.h | 55 ++++ src/video_core/shader/decode.cpp | 37 ++- 6 files changed, 476 insertions(+), 15 deletions(-) create mode 100644 src/video_core/shader/control_flow.cpp create mode 100644 src/video_core/shader/control_flow.h diff --git a/CMakeModules/GenerateSCMRev.cmake b/CMakeModules/GenerateSCMRev.cmake index dd65cfe42..abdc74428 100644 --- a/CMakeModules/GenerateSCMRev.cmake +++ b/CMakeModules/GenerateSCMRev.cmake @@ -82,6 +82,8 @@ set(HASH_FILES "${VIDEO_CORE}/shader/decode/shift.cpp" "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 2554add28..2b4266f29 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -56,6 +56,8 @@ add_custom_command(OUTPUT scm_rev.cpp "${VIDEO_CORE}/shader/decode/shift.cpp" "${VIDEO_CORE}/shader/decode/video.cpp" "${VIDEO_CORE}/shader/decode/xmad.cpp" + "${VIDEO_CORE}/shader/control_flow.cpp" + "${VIDEO_CORE}/shader/control_flow.h" "${VIDEO_CORE}/shader/decode.cpp" "${VIDEO_CORE}/shader/node.h" "${VIDEO_CORE}/shader/node_helper.cpp" diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..cd32c65d3 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -103,6 +103,8 @@ add_library(video_core STATIC shader/decode/video.cpp shader/decode/xmad.cpp shader/decode/other.cpp + shader/control_flow.cpp + shader/control_flow.h shader/decode.cpp shader/node_helper.cpp shader/node_helper.h diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp new file mode 100644 index 000000000..fcf22c7f2 --- /dev/null +++ b/src/video_core/shader/control_flow.cpp @@ -0,0 +1,393 @@ + +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/common_types.h" +#include "video_core/shader/control_flow.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::Instruction; +using Tegra::Shader::OpCode; + +constexpr s32 unassigned_branch = -2; + +struct BlockBranchInfo { + Condition condition{}; + s32 address{exit_branch}; + bool kill{}; + bool is_sync{}; + bool is_brk{}; +}; + +struct BlockInfo { + BlockInfo() {} + u32 start{}; + u32 end{}; + bool visited{}; + BlockBranchInfo branch{}; + + bool IsInside(const u32 address) const { + return start <= address && address <= end; + } +}; + +struct Stamp { + Stamp() = default; + Stamp(u32 address, u32 target) : address{address}, target{target} {} + u32 address{}; + u32 target{}; + bool operator==(const Stamp& sb) const { + return std::tie(address, target) == std::tie(sb.address, sb.target); + } + bool operator<(const Stamp& sb) const { + return address < sb.address; + } + bool operator>(const Stamp& sb) const { + return address > sb.address; + } + bool operator<=(const Stamp& sb) const { + return address <= sb.address; + } + bool operator>=(const Stamp& sb) const { + return address >= sb.address; + } +}; + +struct CFGRebuildState { + explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size) + : program_code{program_code}, program_size{program_size} { + // queries.clear(); + block_info.clear(); + labels.clear(); + visited_address.clear(); + ssy_labels.clear(); + pbk_labels.clear(); + inspect_queries.clear(); + } + + std::vector block_info{}; + std::list inspect_queries{}; + // std::list queries{}; + std::unordered_set visited_address{}; + std::unordered_set labels{}; + std::set ssy_labels; + std::set pbk_labels; + const ProgramCode& program_code; + const std::size_t program_size; +}; + +enum class BlockCollision : u32 { None = 0, Found = 1, Inside = 2 }; + +std::pair::iterator> TryGetBlock(CFGRebuildState& state, + u32 address) { + auto it = state.block_info.begin(); + while (it != state.block_info.end()) { + if (it->start == address) { + return {BlockCollision::Found, it}; + } + if (it->IsInside(address)) { + return {BlockCollision::Inside, it}; + } + it++; + } + return {BlockCollision::None, it}; +} + +struct ParseInfo { + BlockBranchInfo branch_info{}; + u32 end_address{}; +}; + +BlockInfo* CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) { + auto& it = state.block_info.emplace_back(); + it.start = start; + it.end = end; + state.visited_address.insert(start); + return ⁢ +} + +Pred GetPredicate(u32 index, bool negated) { + return static_cast(index + (negated ? 8 : 0)); +} + +enum class ParseResult : u32 { + ControlCaught = 0, + BlockEnd = 1, + AbnormalFlow = 2, +}; + +ParseResult ParseCode(CFGRebuildState& state, u32 address, ParseInfo& parse_info) { + + u32 offset = static_cast(address); + u32 end_address = static_cast(state.program_size - 10U) * 8U; + + auto insert_label = ([](CFGRebuildState& state, u32 address) { + auto pair = state.labels.emplace(address); + if (pair.second) { + state.inspect_queries.push_back(address); + } + }); + + while (true) { + if (offset >= end_address) { + parse_info.branch_info.address = exit_branch; + break; + } + if (state.visited_address.count(offset) != 0) { + parse_info.branch_info.address = offset; + break; + } + const Instruction instr = {state.program_code[offset]}; + const auto opcode = OpCode::Decode(instr); + if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) { + offset++; + continue; + } + + switch (opcode->get().GetId()) { + case OpCode::Id::EXIT: { + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = exit_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::BRA: { + if (instr.bra.constant_buffer != 0) { + return ParseResult::AbnormalFlow; + } + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + u32 branch_offset = offset + instr.bra.GetBranchTarget(); + if (branch_offset == 0) { + parse_info.branch_info.address = exit_branch; + } else { + parse_info.branch_info.address = branch_offset; + } + insert_label(state, branch_offset); + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::SYNC: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = unassigned_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = true; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::BRK: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = unassigned_branch; + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = true; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::KIL: { + parse_info.branch_info.condition; + const auto pred_index = static_cast(instr.pred.pred_index); + parse_info.branch_info.condition.predicate = + GetPredicate(pred_index, instr.negate_pred != 0); + if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) { + offset++; + continue; + } + const ConditionCode cc = instr.flow_condition_code; + parse_info.branch_info.condition.cc = cc; + if (cc == ConditionCode::F) { + offset++; + continue; + } + parse_info.branch_info.address = exit_branch; + parse_info.branch_info.kill = true; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset; + + return ParseResult::ControlCaught; + } + case OpCode::Id::SSY: { + const u32 target = offset + instr.bra.GetBranchTarget(); + insert_label(state, target); + state.ssy_labels.emplace(offset, target); + break; + } + case OpCode::Id::PBK: { + const u32 target = offset + instr.bra.GetBranchTarget(); + insert_label(state, target); + state.pbk_labels.emplace(offset, target); + break; + } + default: + break; + } + + offset++; + } + parse_info.branch_info.kill = false; + parse_info.branch_info.is_sync = false; + parse_info.branch_info.is_brk = false; + parse_info.end_address = offset - 1; + return ParseResult::BlockEnd; +} + +bool TryInspectAddress(CFGRebuildState& state) { + if (state.inspect_queries.empty()) { + return false; + } + u32 address = state.inspect_queries.front(); + state.inspect_queries.pop_front(); + auto search_result = TryGetBlock(state, address); + BlockInfo* block_info; + switch (search_result.first) { + case BlockCollision::Found: { + return true; + break; + } + case BlockCollision::Inside: { + // This case is the tricky one: + // We need to Split the block in 2 sepprate blocks + auto it = search_result.second; + block_info = CreateBlockInfo(state, address, it->end); + it->end = address - 1; + block_info->branch = it->branch; + BlockBranchInfo forward_branch{}; + forward_branch.address = address; + it->branch = forward_branch; + return true; + break; + } + default: + break; + } + ParseInfo parse_info; + ParseResult parse_result = ParseCode(state, address, parse_info); + if (parse_result == ParseResult::AbnormalFlow) { + // if it's the end of the program, end it safely + // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction + return false; + } + + block_info = CreateBlockInfo(state, address, parse_info.end_address); + block_info->branch = parse_info.branch_info; + if (parse_info.branch_info.condition.IsUnconditional()) { + return true; + } + + u32 fallthrough_address = parse_info.end_address + 1; + state.inspect_queries.push_front(fallthrough_address); + return true; +} + +bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, + ShaderCharacteristics& result_out) { + CFGRebuildState state{program_code, program_size}; + // Inspect Code and generate blocks + state.labels.clear(); + state.labels.emplace(start_address); + state.inspect_queries.push_back(start_address); + while (!state.inspect_queries.empty()) { + if (!TryInspectAddress(state)) { + return false; + } + } + std::sort(state.block_info.begin(), state.block_info.end(), + [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; }); + // Remove unvisited blocks + result_out.blocks.clear(); + result_out.decompilable = false; + result_out.start = start_address; + result_out.end = start_address; + for (auto& block : state.block_info) { + ShaderBlock new_block{}; + new_block.start = block.start; + new_block.end = block.end; + new_block.branch.cond = block.branch.condition; + new_block.branch.kills = block.branch.kill; + new_block.branch.address = block.branch.address; + result_out.end = std::max(result_out.end, block.end); + result_out.blocks.push_back(new_block); + } + if (result_out.decompilable) { + return true; + } + auto back = result_out.blocks.begin(); + auto next = std::next(back); + while (next != result_out.blocks.end()) { + if (state.labels.count(next->start) == 0 && next->start == back->end + 1) { + back->end = next->end; + next = result_out.blocks.erase(next); + continue; + } + back = next; + next++; + } + return true; +} +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h new file mode 100644 index 000000000..16736d57a --- /dev/null +++ b/src/video_core/shader/control_flow.h @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include +#include + +#include "video_core/engines/shader_bytecode.h" +#include "video_core/shader/shader_ir.h" + +namespace VideoCommon::Shader { + +using Tegra::Shader::ConditionCode; +using Tegra::Shader::Pred; + +constexpr s32 exit_branch = -1; + +struct Condition { + Pred predicate{Pred::UnusedIndex}; + ConditionCode cc{ConditionCode::T}; + + bool IsUnconditional() const { + return (predicate == Pred::UnusedIndex) && (cc == ConditionCode::T); + } +}; + +struct ShaderBlock { + ShaderBlock() {} + ShaderBlock(const ShaderBlock& sb) = default; + u32 start{}; + u32 end{}; + struct Branch { + Condition cond{}; + bool kills{}; + s32 address{}; + bool operator==(const Branch& b) const { + return std::memcmp(this, &b, sizeof(Branch)) == 0; + } + } branch; + bool operator==(const ShaderBlock& sb) const { + return std::memcmp(this, &sb, sizeof(ShaderBlock)) == 0; + } +}; + +struct ShaderCharacteristics { + std::list blocks; + bool decompilable{}; + u32 start; + u32 end; +}; + +bool ScanFlow(const ProgramCode& program_code, u32 program_size, u32 start_address, + ShaderCharacteristics& result_out); + +} // namespace VideoCommon::Shader diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp index 2c9ff28f2..7f433c56b 100644 --- a/src/video_core/shader/decode.cpp +++ b/src/video_core/shader/decode.cpp @@ -11,6 +11,7 @@ #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/shader/control_flow.h" #include "video_core/shader/node_helper.h" #include "video_core/shader/shader_ir.h" @@ -51,25 +52,31 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) { void ShaderIR::Decode() { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - std::set labels; - const ExitMethod exit_method = Scan(main_offset, MAX_PROGRAM_LENGTH, labels); - if (exit_method != ExitMethod::AlwaysEnd) { - UNREACHABLE_MSG("Program does not always end"); - } - - if (labels.empty()) { - basic_blocks.insert({main_offset, DecodeRange(main_offset, MAX_PROGRAM_LENGTH)}); + ShaderCharacteristics shader_info{}; + bool can_proceed = ScanFlow(program_code, MAX_PROGRAM_LENGTH, main_offset, shader_info); + if (can_proceed) { + coverage_begin = shader_info.start; + coverage_end = shader_info.end; + if (shader_info.decompilable) { + return; + } + // we can't decompile it, fallback to standard method + for (const auto& block : shader_info.blocks) { + basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)}); + } return; } + LOG_CRITICAL(HW_GPU, "Flow Analysis failed, falling back to brute force compiling"); - labels.insert(main_offset); - - for (const u32 label : labels) { - const auto next_it = labels.lower_bound(label + 1); - const u32 next_label = next_it == labels.end() ? MAX_PROGRAM_LENGTH : *next_it; - - basic_blocks.insert({label, DecodeRange(label, next_label)}); + // Now we need to deal with an undecompilable shader. We need to brute force + // a shader that captures every position. + coverage_begin = shader_info.start; + const u32 shader_end = static_cast(MAX_PROGRAM_LENGTH); + coverage_end = shader_end; + for (u32 label = main_offset; label < shader_end; label++) { + basic_blocks.insert({label, DecodeRange(label, label + 1)}); } + return; } ExitMethod ShaderIR::Scan(u32 begin, u32 end, std::set& labels) {