Merge pull request #3662 from wwylele/shader-hash-cache
shader: avoid recomputing hash for the same program
This commit is contained in:
commit
048b0fc0d3
|
@ -451,6 +451,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
|
LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
|
||||||
} else {
|
} else {
|
||||||
g_state.gs.program_code[offset] = value;
|
g_state.gs.program_code[offset] = value;
|
||||||
|
g_state.gs.MarkProgramCodeDirty();
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -469,6 +470,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
|
LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
|
||||||
} else {
|
} else {
|
||||||
g_state.gs.swizzle_data[offset] = value;
|
g_state.gs.swizzle_data[offset] = value;
|
||||||
|
g_state.gs.MarkSwizzleDataDirty();
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -518,8 +520,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
|
LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
|
||||||
} else {
|
} else {
|
||||||
g_state.vs.program_code[offset] = value;
|
g_state.vs.program_code[offset] = value;
|
||||||
|
g_state.vs.MarkProgramCodeDirty();
|
||||||
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
||||||
g_state.gs.program_code[offset] = value;
|
g_state.gs.program_code[offset] = value;
|
||||||
|
g_state.gs.MarkProgramCodeDirty();
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
@ -539,8 +543,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||||
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
|
LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
|
||||||
} else {
|
} else {
|
||||||
g_state.vs.swizzle_data[offset] = value;
|
g_state.vs.swizzle_data[offset] = value;
|
||||||
|
g_state.vs.MarkSwizzleDataDirty();
|
||||||
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
|
||||||
g_state.gs.swizzle_data[offset] = value;
|
g_state.gs.swizzle_data[offset] = value;
|
||||||
|
g_state.gs.MarkSwizzleDataDirty();
|
||||||
}
|
}
|
||||||
offset++;
|
offset++;
|
||||||
}
|
}
|
||||||
|
|
|
@ -12,6 +12,7 @@
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_funcs.h"
|
#include "common/common_funcs.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/hash.h"
|
||||||
#include "common/vector_math.h"
|
#include "common/vector_math.h"
|
||||||
#include "video_core/pica_types.h"
|
#include "video_core/pica_types.h"
|
||||||
#include "video_core/regs_rasterizer.h"
|
#include "video_core/regs_rasterizer.h"
|
||||||
|
@ -173,27 +174,29 @@ struct GSUnitState : public UnitState {
|
||||||
GSEmitter emitter;
|
GSEmitter emitter;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ShaderSetup {
|
struct Uniforms {
|
||||||
struct {
|
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
||||||
// The float uniforms are accessed by the shader JIT using SSE instructions, and are
|
// therefore required to be 16-byte aligned.
|
||||||
// therefore required to be 16-byte aligned.
|
alignas(16) Math::Vec4<float24> f[96];
|
||||||
alignas(16) Math::Vec4<float24> f[96];
|
|
||||||
|
|
||||||
std::array<bool, 16> b;
|
std::array<bool, 16> b;
|
||||||
std::array<Math::Vec4<u8>, 4> i;
|
std::array<Math::Vec4<u8>, 4> i;
|
||||||
} uniforms;
|
|
||||||
|
|
||||||
static size_t GetFloatUniformOffset(unsigned index) {
|
static size_t GetFloatUniformOffset(unsigned index) {
|
||||||
return offsetof(ShaderSetup, uniforms.f) + index * sizeof(Math::Vec4<float24>);
|
return offsetof(Uniforms, f) + index * sizeof(Math::Vec4<float24>);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t GetBoolUniformOffset(unsigned index) {
|
static size_t GetBoolUniformOffset(unsigned index) {
|
||||||
return offsetof(ShaderSetup, uniforms.b) + index * sizeof(bool);
|
return offsetof(Uniforms, b) + index * sizeof(bool);
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t GetIntUniformOffset(unsigned index) {
|
static size_t GetIntUniformOffset(unsigned index) {
|
||||||
return offsetof(ShaderSetup, uniforms.i) + index * sizeof(Math::Vec4<u8>);
|
return offsetof(Uniforms, i) + index * sizeof(Math::Vec4<u8>);
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ShaderSetup {
|
||||||
|
Uniforms uniforms;
|
||||||
|
|
||||||
std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
|
std::array<u32, MAX_PROGRAM_CODE_LENGTH> program_code;
|
||||||
std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
|
std::array<u32, MAX_SWIZZLE_DATA_LENGTH> swizzle_data;
|
||||||
|
@ -204,6 +207,36 @@ struct ShaderSetup {
|
||||||
/// Used by the JIT, points to a compiled shader object.
|
/// Used by the JIT, points to a compiled shader object.
|
||||||
const void* cached_shader = nullptr;
|
const void* cached_shader = nullptr;
|
||||||
} engine_data;
|
} engine_data;
|
||||||
|
|
||||||
|
void MarkProgramCodeDirty() {
|
||||||
|
program_code_hash_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void MarkSwizzleDataDirty() {
|
||||||
|
swizzle_data_hash_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetProgramCodeHash() {
|
||||||
|
if (program_code_hash_dirty) {
|
||||||
|
program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
|
||||||
|
program_code_hash_dirty = false;
|
||||||
|
}
|
||||||
|
return program_code_hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 GetSwizzleDataHash() {
|
||||||
|
if (swizzle_data_hash_dirty) {
|
||||||
|
swizzle_data_hash = Common::ComputeHash64(&swizzle_data, sizeof(swizzle_data));
|
||||||
|
swizzle_data_hash_dirty = false;
|
||||||
|
}
|
||||||
|
return swizzle_data_hash;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool program_code_hash_dirty = true;
|
||||||
|
bool swizzle_data_hash_dirty = true;
|
||||||
|
u64 program_code_hash = 0xDEADC0DE;
|
||||||
|
u64 swizzle_data_hash = 0xDEADC0DE;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ShaderEngine {
|
class ShaderEngine {
|
||||||
|
|
|
@ -2,7 +2,6 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
#include "common/hash.h"
|
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/shader/shader.h"
|
#include "video_core/shader/shader.h"
|
||||||
#include "video_core/shader/shader_jit_x64.h"
|
#include "video_core/shader/shader_jit_x64.h"
|
||||||
|
@ -18,8 +17,8 @@ void JitX64Engine::SetupBatch(ShaderSetup& setup, unsigned int entry_point) {
|
||||||
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
ASSERT(entry_point < MAX_PROGRAM_CODE_LENGTH);
|
||||||
setup.engine_data.entry_point = entry_point;
|
setup.engine_data.entry_point = entry_point;
|
||||||
|
|
||||||
u64 code_hash = Common::ComputeHash64(&setup.program_code, sizeof(setup.program_code));
|
u64 code_hash = setup.GetProgramCodeHash();
|
||||||
u64 swizzle_hash = Common::ComputeHash64(&setup.swizzle_data, sizeof(setup.swizzle_data));
|
u64 swizzle_hash = setup.GetSwizzleDataHash();
|
||||||
|
|
||||||
u64 cache_key = code_hash ^ swizzle_hash;
|
u64 cache_key = code_hash ^ swizzle_hash;
|
||||||
auto iter = cache.find(cache_key);
|
auto iter = cache.find(cache_key);
|
||||||
|
|
|
@ -104,7 +104,7 @@ const JitFunction instr_table[64] = {
|
||||||
// purposes, as documented below:
|
// purposes, as documented below:
|
||||||
|
|
||||||
/// Pointer to the uniform memory
|
/// Pointer to the uniform memory
|
||||||
static const Reg64 SETUP = r9;
|
static const Reg64 UNIFORMS = r9;
|
||||||
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
/// The two 32-bit VS address offset registers set by the MOVA instruction
|
||||||
static const Reg64 ADDROFFS_REG_0 = r10;
|
static const Reg64 ADDROFFS_REG_0 = r10;
|
||||||
static const Reg64 ADDROFFS_REG_1 = r11;
|
static const Reg64 ADDROFFS_REG_1 = r11;
|
||||||
|
@ -139,7 +139,7 @@ static const Xmm NEGBIT = xmm15;
|
||||||
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
|
||||||
static const BitSet32 persistent_regs = BuildRegSet({
|
static const BitSet32 persistent_regs = BuildRegSet({
|
||||||
// Pointers to register blocks
|
// Pointers to register blocks
|
||||||
SETUP,
|
UNIFORMS,
|
||||||
STATE,
|
STATE,
|
||||||
// Cached registers
|
// Cached registers
|
||||||
ADDROFFS_REG_0,
|
ADDROFFS_REG_0,
|
||||||
|
@ -184,8 +184,8 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
|
||||||
size_t src_offset;
|
size_t src_offset;
|
||||||
|
|
||||||
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
|
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
|
||||||
src_ptr = SETUP;
|
src_ptr = UNIFORMS;
|
||||||
src_offset = ShaderSetup::GetFloatUniformOffset(src_reg.GetIndex());
|
src_offset = Uniforms::GetFloatUniformOffset(src_reg.GetIndex());
|
||||||
} else {
|
} else {
|
||||||
src_ptr = STATE;
|
src_ptr = STATE;
|
||||||
src_offset = UnitState::InputOffset(src_reg);
|
src_offset = UnitState::InputOffset(src_reg);
|
||||||
|
@ -354,8 +354,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void JitShader::Compile_UniformCondition(Instruction instr) {
|
void JitShader::Compile_UniformCondition(Instruction instr) {
|
||||||
size_t offset = ShaderSetup::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
|
size_t offset = Uniforms::GetBoolUniformOffset(instr.flow_control.bool_uniform_id);
|
||||||
cmp(byte[SETUP + offset], 0);
|
cmp(byte[UNIFORMS + offset], 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
BitSet32 JitShader::PersistentCallerSavedRegs() {
|
||||||
|
@ -713,8 +713,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
|
||||||
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
|
// This decodes the fields from the integer uniform at index instr.flow_control.int_uniform_id.
|
||||||
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
|
// The Y (LOOPCOUNT_REG) and Z (LOOPINC) component are kept multiplied by 16 (Left shifted by
|
||||||
// 4 bits) to be used as an offset into the 16-byte vector registers later
|
// 4 bits) to be used as an offset into the 16-byte vector registers later
|
||||||
size_t offset = ShaderSetup::GetIntUniformOffset(instr.flow_control.int_uniform_id);
|
size_t offset = Uniforms::GetIntUniformOffset(instr.flow_control.int_uniform_id);
|
||||||
mov(LOOPCOUNT, dword[SETUP + offset]);
|
mov(LOOPCOUNT, dword[UNIFORMS + offset]);
|
||||||
mov(LOOPCOUNT_REG, LOOPCOUNT);
|
mov(LOOPCOUNT_REG, LOOPCOUNT);
|
||||||
shr(LOOPCOUNT_REG, 4);
|
shr(LOOPCOUNT_REG, 4);
|
||||||
and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
|
and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
|
||||||
|
@ -882,7 +882,7 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
|
||||||
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
ABI_PushRegistersAndAdjustStack(*this, ABI_ALL_CALLEE_SAVED, 8, 16);
|
||||||
mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
|
mov(qword[rsp + 8], 0xFFFFFFFFFFFFFFFFULL);
|
||||||
|
|
||||||
mov(SETUP, ABI_PARAM1);
|
mov(UNIFORMS, ABI_PARAM1);
|
||||||
mov(STATE, ABI_PARAM2);
|
mov(STATE, ABI_PARAM2);
|
||||||
|
|
||||||
// Zero address/loop registers
|
// Zero address/loop registers
|
||||||
|
|
|
@ -34,7 +34,7 @@ public:
|
||||||
JitShader();
|
JitShader();
|
||||||
|
|
||||||
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
|
void Run(const ShaderSetup& setup, UnitState& state, unsigned offset) const {
|
||||||
program(&setup, &state, instruction_labels[offset].getAddress());
|
program(&setup.uniforms, &state, instruction_labels[offset].getAddress());
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,
|
void Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_code,
|
||||||
|
|
Reference in New Issue