yuzu-emu
/
yuzu-mainline
Archived
1
0
Fork 0

VideoCore/Shader: Split interpreter and JIT into separate ShaderEngines

This commit is contained in:
Yuri Kunde Schlesner 2016-12-17 01:21:16 -08:00
parent 8eefc62833
commit 114d6b2f97
8 changed files with 160 additions and 104 deletions

View File

@ -50,9 +50,11 @@ set(HEADERS
if(ARCHITECTURE_x86_64) if(ARCHITECTURE_x86_64)
set(SRCS ${SRCS} set(SRCS ${SRCS}
shader/shader_jit_x64.cpp
shader/shader_jit_x64_compiler.cpp) shader/shader_jit_x64_compiler.cpp)
set(HEADERS ${HEADERS} set(HEADERS ${HEADERS}
shader/shader_jit_x64.h
shader/shader_jit_x64_compiler.h) shader/shader_jit_x64_compiler.h)
endif() endif()

View File

@ -499,7 +499,7 @@ void Init() {
} }
void Shutdown() { void Shutdown() {
Shader::ClearCache(); Shader::Shutdown();
} }
template <typename T> template <typename T>

View File

@ -2,14 +2,8 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <atomic>
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <unordered_map>
#include <utility>
#include <boost/range/algorithm/fill.hpp>
#include "common/bit_field.h"
#include "common/hash.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "video_core/pica.h" #include "video_core/pica.h"
@ -17,7 +11,7 @@
#include "video_core/shader/shader.h" #include "video_core/shader/shader.h"
#include "video_core/shader/shader_interpreter.h" #include "video_core/shader/shader_interpreter.h"
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
#include "video_core/shader/shader_jit_x64_compiler.h" #include "video_core/shader/shader_jit_x64.h"
#endif // ARCHITECTURE_x86_64 #endif // ARCHITECTURE_x86_64
#include "video_core/video_core.h" #include "video_core/video_core.h"
@ -87,91 +81,31 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) {
conditional_code[1] = false; conditional_code[1] = false;
} }
class MergedShaderEngine : public ShaderEngine {
public:
void SetupBatch(const ShaderSetup* setup) override;
void Run(UnitState& state, unsigned int entry_point) const override;
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const override;
private:
const ShaderSetup* setup = nullptr;
};
#ifdef ARCHITECTURE_x86_64
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
static const JitShader* jit_shader;
#endif // ARCHITECTURE_x86_64
void ClearCache() {
#ifdef ARCHITECTURE_x86_64
shader_map.clear();
#endif // ARCHITECTURE_x86_64
}
void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) {
setup = setup_;
if (setup == nullptr)
return;
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) {
u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
u64 cache_key = code_hash ^ swizzle_hash;
auto iter = shader_map.find(cache_key);
if (iter != shader_map.end()) {
jit_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile();
jit_shader = shader.get();
shader_map[cache_key] = std::move(shader);
}
}
#endif // ARCHITECTURE_x86_64
}
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const {
ASSERT(setup != nullptr);
ASSERT(entry_point < 1024);
MICROPROFILE_SCOPE(GPU_Shader);
#ifdef ARCHITECTURE_x86_64 #ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled) { static std::unique_ptr<JitX64Engine> jit_engine;
jit_shader->Run(*setup, state, entry_point);
} else {
DebugData<false> dummy_debug_data;
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
}
#else
DebugData<false> dummy_debug_data;
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
#endif // ARCHITECTURE_x86_64 #endif // ARCHITECTURE_x86_64
} static InterpreterEngine interpreter_engine;
DebugData<true> MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const {
ASSERT(setup != nullptr);
ASSERT(entry_point < 1024);
UnitState state;
DebugData<true> debug_data;
// Setup input register table
boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
state.LoadInputVertex(input, num_attributes);
RunInterpreter(*setup, state, debug_data, entry_point);
return debug_data;
}
ShaderEngine* GetEngine() { ShaderEngine* GetEngine() {
static MergedShaderEngine merged_engine; #ifdef ARCHITECTURE_x86_64
return &merged_engine; // TODO(yuriks): Re-initialize on each change rather than being persistent
if (VideoCore::g_shader_jit_enabled) {
if (jit_engine == nullptr) {
jit_engine = std::make_unique<JitX64Engine>();
}
return jit_engine.get();
}
#endif // ARCHITECTURE_x86_64
return &interpreter_engine;
}
void Shutdown() {
#ifdef ARCHITECTURE_x86_64
jit_engine = nullptr;
#endif // ARCHITECTURE_x86_64
} }
} // namespace Shader } // namespace Shader

View File

@ -6,7 +6,6 @@
#include <array> #include <array>
#include <cstddef> #include <cstddef>
#include <memory>
#include <type_traits> #include <type_traits>
#include <nihstro/shader_bytecode.h> #include <nihstro/shader_bytecode.h>
#include "common/assert.h" #include "common/assert.h"
@ -152,9 +151,6 @@ struct UnitState {
void LoadInputVertex(const InputVertex& input, int num_attributes); void LoadInputVertex(const InputVertex& input, int num_attributes);
}; };
/// Clears the shader cache
void ClearCache();
struct ShaderSetup { struct ShaderSetup {
struct { struct {
// The float uniforms are accessed by the shader JIT using SSE instructions, and are // The float uniforms are accessed by the shader JIT using SSE instructions, and are
@ -210,6 +206,7 @@ public:
// TODO(yuriks): Remove and make it non-global state somewhere // TODO(yuriks): Remove and make it non-global state somewhere
ShaderEngine* GetEngine(); ShaderEngine* GetEngine();
void Shutdown();
} // namespace Shader } // namespace Shader

View File

@ -7,10 +7,12 @@
#include <cmath> #include <cmath>
#include <numeric> #include <numeric>
#include <boost/container/static_vector.hpp> #include <boost/container/static_vector.hpp>
#include <boost/range/algorithm/fill.hpp>
#include <nihstro/shader_bytecode.h> #include <nihstro/shader_bytecode.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h"
#include "common/vector_math.h" #include "common/vector_math.h"
#include "video_core/pica_state.h" #include "video_core/pica_state.h"
#include "video_core/pica_types.h" #include "video_core/pica_types.h"
@ -37,7 +39,7 @@ struct CallStackElement {
}; };
template <bool Debug> template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data, static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
unsigned offset) { unsigned offset) {
// TODO: Is there a maximal size for this? // TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack; boost::container::static_vector<CallStackElement, 16> call_stack;
@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
} }
} }
// Explicit instantiation void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) {
template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset); setup = setup_;
template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset); }
MICROPROFILE_DECLARE(GPU_Shader);
void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const {
ASSERT(setup != nullptr);
ASSERT(entry_point < 1024);
MICROPROFILE_SCOPE(GPU_Shader);
DebugData<false> dummy_debug_data;
RunInterpreter(*setup, state, dummy_debug_data, entry_point);
}
DebugData<true> InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const {
ASSERT(setup != nullptr);
ASSERT(entry_point < 1024);
UnitState state;
DebugData<true> debug_data;
// Setup input register table
boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
state.LoadInputVertex(input, num_attributes);
RunInterpreter(*setup, state, debug_data, entry_point);
return debug_data;
}
} // namespace } // namespace

View File

@ -4,19 +4,22 @@
#pragma once #pragma once
#include "video_core/shader/shader.h"
namespace Pica { namespace Pica {
namespace Shader { namespace Shader {
struct ShaderSetup; class InterpreterEngine final : public ShaderEngine {
struct UnitState; public:
void SetupBatch(const ShaderSetup* setup) override;
void Run(UnitState& state, unsigned int entry_point) const override;
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const override;
template <bool Debug> private:
struct DebugData; const ShaderSetup* setup = nullptr;
};
template <bool Debug>
void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
unsigned offset);
} // namespace } // namespace

View File

@ -0,0 +1,56 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/hash.h"
#include "common/microprofile.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_jit_x64.h"
#include "video_core/shader/shader_jit_x64_compiler.h"
namespace Pica {
namespace Shader {
JitX64Engine::JitX64Engine() = default;
JitX64Engine::~JitX64Engine() = default;
void JitX64Engine::SetupBatch(const ShaderSetup* setup_) {
cached_shader = nullptr;
setup = setup_;
if (setup == nullptr)
return;
u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
u64 cache_key = code_hash ^ swizzle_hash;
auto iter = cache.find(cache_key);
if (iter != cache.end()) {
cached_shader = iter->second.get();
} else {
auto shader = std::make_unique<JitShader>();
shader->Compile();
cached_shader = shader.get();
cache.emplace_hint(iter, cache_key, std::move(shader));
}
}
MICROPROFILE_DECLARE(GPU_Shader);
void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const {
ASSERT(setup != nullptr);
ASSERT(cached_shader != nullptr);
ASSERT(entry_point < 1024);
MICROPROFILE_SCOPE(GPU_Shader);
cached_shader->Run(*setup, state, entry_point);
}
DebugData<true> JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const {
UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT.");
}
} // namespace Shader
} // namespace Pica

View File

@ -0,0 +1,35 @@
// Copyright 2016 Citra Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
namespace Pica {
namespace Shader {
class JitShader;
class JitX64Engine final : public ShaderEngine {
public:
JitX64Engine();
~JitX64Engine() override;
void SetupBatch(const ShaderSetup* setup) override;
void Run(UnitState& state, unsigned int entry_point) const override;
DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
unsigned int entry_point) const override;
private:
const ShaderSetup* setup = nullptr;
std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
const JitShader* cached_shader = nullptr;
};
} // namespace Shader
} // namespace Pica