maxwell_3d: Restructure macro upload to use a single macro code memory.
- Fixes an issue where macros could be skipped. - Fixes rendering of distant objects in Super Mario Odyssey.
This commit is contained in:
parent
d08457f879
commit
de0ab806df
|
@ -43,15 +43,17 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
|
||||||
// Reset the current macro.
|
// Reset the current macro.
|
||||||
executing_macro = 0;
|
executing_macro = 0;
|
||||||
|
|
||||||
// The requested macro must have been uploaded already.
|
// Lookup the macro offset
|
||||||
auto macro_code = uploaded_macros.find(method);
|
const u32 entry{(method - MacroRegistersStart) >> 1};
|
||||||
if (macro_code == uploaded_macros.end()) {
|
const auto& search{macro_offsets.find(entry)};
|
||||||
LOG_ERROR(HW_GPU, "Macro {:04X} was not uploaded", method);
|
if (search == macro_offsets.end()) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "macro not found for method 0x{:X}!", method);
|
||||||
|
UNREACHABLE();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Execute the current macro.
|
// Execute the current macro.
|
||||||
macro_interpreter.Execute(macro_code->second, std::move(parameters));
|
macro_interpreter.Execute(search->second, std::move(parameters));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||||
|
@ -97,6 +99,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||||
ProcessMacroUpload(value);
|
ProcessMacroUpload(value);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MAXWELL3D_REG_INDEX(macros.bind): {
|
||||||
|
ProcessMacroBind(value);
|
||||||
|
break;
|
||||||
|
}
|
||||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]):
|
||||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[1]):
|
||||||
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
case MAXWELL3D_REG_INDEX(const_buffer.cb_data[2]):
|
||||||
|
@ -158,9 +164,13 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
void Maxwell3D::ProcessMacroUpload(u32 data) {
|
||||||
// Store the uploaded macro code to interpret them when they're called.
|
ASSERT_MSG(regs.macros.upload_address < macro_memory.size(),
|
||||||
auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
|
"upload_address exceeded macro_memory size!");
|
||||||
macro.push_back(data);
|
macro_memory[regs.macros.upload_address++] = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Maxwell3D::ProcessMacroBind(u32 data) {
|
||||||
|
macro_offsets[regs.macros.entry] = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessQueryGet() {
|
void Maxwell3D::ProcessQueryGet() {
|
||||||
|
|
|
@ -475,12 +475,13 @@ public:
|
||||||
INSERT_PADDING_WORDS(0x45);
|
INSERT_PADDING_WORDS(0x45);
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
INSERT_PADDING_WORDS(1);
|
u32 upload_address;
|
||||||
u32 data;
|
u32 data;
|
||||||
u32 entry;
|
u32 entry;
|
||||||
|
u32 bind;
|
||||||
} macros;
|
} macros;
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x189);
|
INSERT_PADDING_WORDS(0x188);
|
||||||
|
|
||||||
u32 tfb_enabled;
|
u32 tfb_enabled;
|
||||||
|
|
||||||
|
@ -994,12 +995,25 @@ public:
|
||||||
/// Returns the texture information for a specific texture in a specific shader stage.
|
/// Returns the texture information for a specific texture in a specific shader stage.
|
||||||
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
|
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
|
||||||
|
|
||||||
|
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
|
||||||
|
/// we've seen used.
|
||||||
|
using MacroMemory = std::array<u32, 0x40000>;
|
||||||
|
|
||||||
|
/// Gets a reference to macro memory.
|
||||||
|
const MacroMemory& GetMacroMemory() const {
|
||||||
|
return macro_memory;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitializeRegisterDefaults();
|
void InitializeRegisterDefaults();
|
||||||
|
|
||||||
VideoCore::RasterizerInterface& rasterizer;
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
||||||
std::unordered_map<u32, std::vector<u32>> uploaded_macros;
|
/// Start offsets of each macro in macro_memory
|
||||||
|
std::unordered_map<u32, u32> macro_offsets;
|
||||||
|
|
||||||
|
/// Memory for macro code
|
||||||
|
MacroMemory macro_memory;
|
||||||
|
|
||||||
/// Macro method that is currently being executed / being fed parameters.
|
/// Macro method that is currently being executed / being fed parameters.
|
||||||
u32 executing_macro = 0;
|
u32 executing_macro = 0;
|
||||||
|
@ -1022,9 +1036,12 @@ private:
|
||||||
*/
|
*/
|
||||||
void CallMacroMethod(u32 method, std::vector<u32> parameters);
|
void CallMacroMethod(u32 method, std::vector<u32> parameters);
|
||||||
|
|
||||||
/// Handles writes to the macro uploading registers.
|
/// Handles writes to the macro uploading register.
|
||||||
void ProcessMacroUpload(u32 data);
|
void ProcessMacroUpload(u32 data);
|
||||||
|
|
||||||
|
/// Handles writes to the macro bind register.
|
||||||
|
void ProcessMacroBind(u32 data);
|
||||||
|
|
||||||
/// Handles a write to the CLEAR_BUFFERS register.
|
/// Handles a write to the CLEAR_BUFFERS register.
|
||||||
void ProcessClearBuffers();
|
void ProcessClearBuffers();
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ namespace Tegra {
|
||||||
|
|
||||||
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
|
||||||
|
|
||||||
void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> parameters) {
|
void MacroInterpreter::Execute(u32 offset, std::vector<u32> parameters) {
|
||||||
Reset();
|
Reset();
|
||||||
registers[1] = parameters[0];
|
registers[1] = parameters[0];
|
||||||
this->parameters = std::move(parameters);
|
this->parameters = std::move(parameters);
|
||||||
|
@ -19,7 +19,7 @@ void MacroInterpreter::Execute(const std::vector<u32>& code, std::vector<u32> pa
|
||||||
// Execute the code until we hit an exit condition.
|
// Execute the code until we hit an exit condition.
|
||||||
bool keep_executing = true;
|
bool keep_executing = true;
|
||||||
while (keep_executing) {
|
while (keep_executing) {
|
||||||
keep_executing = Step(code, false);
|
keep_executing = Step(offset, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assert the the macro used all the input parameters
|
// Assert the the macro used all the input parameters
|
||||||
|
@ -37,10 +37,10 @@ void MacroInterpreter::Reset() {
|
||||||
next_parameter_index = 1;
|
next_parameter_index = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
bool MacroInterpreter::Step(u32 offset, bool is_delay_slot) {
|
||||||
u32 base_address = pc;
|
u32 base_address = pc;
|
||||||
|
|
||||||
Opcode opcode = GetOpcode(code);
|
Opcode opcode = GetOpcode(offset);
|
||||||
pc += 4;
|
pc += 4;
|
||||||
|
|
||||||
// Update the program counter if we were delayed
|
// Update the program counter if we were delayed
|
||||||
|
@ -108,7 +108,7 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
||||||
|
|
||||||
delayed_pc = base_address + opcode.GetBranchTarget();
|
delayed_pc = base_address + opcode.GetBranchTarget();
|
||||||
// Execute one more instruction due to the delay slot.
|
// Execute one more instruction due to the delay slot.
|
||||||
return Step(code, true);
|
return Step(offset, true);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -121,17 +121,18 @@ bool MacroInterpreter::Step(const std::vector<u32>& code, bool is_delay_slot) {
|
||||||
// Exit has a delay slot, execute the next instruction
|
// Exit has a delay slot, execute the next instruction
|
||||||
// Note: Executing an exit during a branch delay slot will cause the instruction at the
|
// Note: Executing an exit during a branch delay slot will cause the instruction at the
|
||||||
// branch target to be executed before exiting.
|
// branch target to be executed before exiting.
|
||||||
Step(code, true);
|
Step(offset, true);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(const std::vector<u32>& code) const {
|
MacroInterpreter::Opcode MacroInterpreter::GetOpcode(u32 offset) const {
|
||||||
|
const auto& macro_memory{maxwell3d.GetMacroMemory()};
|
||||||
ASSERT((pc % sizeof(u32)) == 0);
|
ASSERT((pc % sizeof(u32)) == 0);
|
||||||
ASSERT(pc < code.size() * sizeof(u32));
|
ASSERT((pc + offset) < macro_memory.size() * sizeof(u32));
|
||||||
return {code[pc / sizeof(u32)]};
|
return {macro_memory[offset + pc / sizeof(u32)]};
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
|
u32 MacroInterpreter::GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const {
|
||||||
|
|
|
@ -22,10 +22,10 @@ public:
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Executes the macro code with the specified input parameters.
|
* Executes the macro code with the specified input parameters.
|
||||||
* @param code The macro byte code to execute
|
* @param offset Offset to start execution at.
|
||||||
* @param parameters The parameters of the macro
|
* @param parameters The parameters of the macro.
|
||||||
*/
|
*/
|
||||||
void Execute(const std::vector<u32>& code, std::vector<u32> parameters);
|
void Execute(u32 offset, std::vector<u32> parameters);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class Operation : u32 {
|
enum class Operation : u32 {
|
||||||
|
@ -110,11 +110,11 @@ private:
|
||||||
/**
|
/**
|
||||||
* Executes a single macro instruction located at the current program counter. Returns whether
|
* Executes a single macro instruction located at the current program counter. Returns whether
|
||||||
* the interpreter should keep running.
|
* the interpreter should keep running.
|
||||||
* @param code The macro code to execute.
|
* @param offset Offset to start execution at.
|
||||||
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
* @param is_delay_slot Whether the current step is being executed due to a delay slot in a
|
||||||
* previous instruction.
|
* previous instruction.
|
||||||
*/
|
*/
|
||||||
bool Step(const std::vector<u32>& code, bool is_delay_slot);
|
bool Step(u32 offset, bool is_delay_slot);
|
||||||
|
|
||||||
/// Calculates the result of an ALU operation. src_a OP src_b;
|
/// Calculates the result of an ALU operation. src_a OP src_b;
|
||||||
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
|
u32 GetALUResult(ALUOperation operation, u32 src_a, u32 src_b) const;
|
||||||
|
@ -127,7 +127,7 @@ private:
|
||||||
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
|
bool EvaluateBranchCondition(BranchCondition cond, u32 value) const;
|
||||||
|
|
||||||
/// Reads an opcode at the current program counter location.
|
/// Reads an opcode at the current program counter location.
|
||||||
Opcode GetOpcode(const std::vector<u32>& code) const;
|
Opcode GetOpcode(u32 offset) const;
|
||||||
|
|
||||||
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
/// Returns the specified register's value. Register 0 is hardcoded to always return 0.
|
||||||
u32 GetRegister(u32 register_id) const;
|
u32 GetRegister(u32 register_id) const;
|
||||||
|
|
Reference in New Issue