Merge pull request #652 from neobrain/shader_output_fix
Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
This commit is contained in:
commit
c1f5cb7dd5
|
@ -72,7 +72,7 @@ struct VertexShaderState {
|
||||||
u32* program_counter;
|
u32* program_counter;
|
||||||
|
|
||||||
const float24* input_register_table[16];
|
const float24* input_register_table[16];
|
||||||
float24* output_register_table[7*4];
|
Math::Vec4<float24> output_registers[16];
|
||||||
|
|
||||||
Math::Vec4<float24> temporary_registers[16];
|
Math::Vec4<float24> temporary_registers[16];
|
||||||
bool conditional_code[2];
|
bool conditional_code[2];
|
||||||
|
@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
src2[3] = src2[3] * float24::FromFloat32(-1);
|
src2[3] = src2[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]
|
float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
|
||||||
: (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
|
|
||||||
: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
|
: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
|
@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
src3[3] = src3[3] * float24::FromFloat32(-1);
|
src3[3] = src3[3] * float24::FromFloat32(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]
|
float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
|
|
||||||
: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
|
: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
|
||||||
: dummy_vec4_float24;
|
: dummy_vec4_float24;
|
||||||
|
|
||||||
|
@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
|
||||||
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
|
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
|
||||||
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
|
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
|
||||||
|
|
||||||
// Setup output register table
|
state.conditional_code[0] = false;
|
||||||
OutputVertex ret;
|
state.conditional_code[1] = false;
|
||||||
// Zero output so that attributes which aren't output won't have denormals in them, which will
|
|
||||||
// slow us down later.
|
|
||||||
memset(&ret, 0, sizeof(ret));
|
|
||||||
|
|
||||||
|
ProcessShaderCode(state);
|
||||||
|
DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
|
||||||
|
state.debug.max_opdesc_id, registers.vs_main_offset,
|
||||||
|
registers.vs_output_attributes);
|
||||||
|
|
||||||
|
// Setup output data
|
||||||
|
OutputVertex ret;
|
||||||
|
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
|
||||||
|
// figure out what those circumstances are and enable the remaining outputs then.
|
||||||
for (int i = 0; i < 7; ++i) {
|
for (int i = 0; i < 7; ++i) {
|
||||||
const auto& output_register_map = registers.vs_output_attributes[i];
|
const auto& output_register_map = registers.vs_output_attributes[i];
|
||||||
|
|
||||||
|
@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
|
||||||
output_register_map.map_z, output_register_map.map_w
|
output_register_map.map_z, output_register_map.map_w
|
||||||
};
|
};
|
||||||
|
|
||||||
for (int comp = 0; comp < 4; ++comp)
|
for (int comp = 0; comp < 4; ++comp) {
|
||||||
state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
|
float24* out = ((float24*)&ret) + semantics[comp];
|
||||||
|
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
|
||||||
|
*out = state.output_registers[i][comp];
|
||||||
|
} else {
|
||||||
|
// Zero output so that attributes which aren't output won't have denormals in them,
|
||||||
|
// which would slow us down later.
|
||||||
|
memset(out, 0, sizeof(*out));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
state.conditional_code[0] = false;
|
|
||||||
state.conditional_code[1] = false;
|
|
||||||
|
|
||||||
ProcessShaderCode(state);
|
|
||||||
DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
|
|
||||||
state.debug.max_opdesc_id, registers.vs_main_offset,
|
|
||||||
registers.vs_output_attributes);
|
|
||||||
|
|
||||||
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||||
|
|
Reference in New Issue