yuzu-emu
/
yuzu-android
Archived
1
0
Fork 0

Rasterizer: Use UBOs instead of SSBOs for uploading const buffers.

This should help a bit with GPU performance once we're GPU-bound.
This commit is contained in:
Subv 2018-06-09 18:02:05 -05:00
parent d81aaa3ed3
commit 2a7653142d
4 changed files with 39 additions and 18 deletions

View File

@ -197,8 +197,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
// Next available bindpoints to use when uploading the const buffers and textures to the GLSL // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
// shaders. // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
u32 current_constbuffer_bindpoint = 0; u32 current_constbuffer_bindpoint = uniform_buffers.size();
u32 current_texture_bindpoint = 0; u32 current_texture_bindpoint = 0;
for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
@ -608,27 +608,33 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); boost::optional<VAddr> addr = gpu.memory_manager->GpuToCpuAddress(buffer.address);
std::vector<u8> data; size_t size = 0;
if (used_buffer.IsIndirect()) { if (used_buffer.IsIndirect()) {
// Buffer is accessed indirectly, so upload the entire thing // Buffer is accessed indirectly, so upload the entire thing
data.resize(buffer.size * sizeof(float)); size = buffer.size * sizeof(float);
} else { } else {
// Buffer is accessed directly, upload just what we use // Buffer is accessed directly, upload just what we use
data.resize(used_buffer.GetSize() * sizeof(float)); size = used_buffer.GetSize() * sizeof(float);
} }
// Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
// UBO alignment requirements.
size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
std::vector<u8> data(size);
Memory::ReadBlock(*addr, data.data(), data.size()); Memory::ReadBlock(*addr, data.data(), data.size());
glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo);
glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); glBindBuffer(GL_UNIFORM_BUFFER, 0);
// Now configure the bindpoint of the buffer inside the shader // Now configure the bindpoint of the buffer inside the shader
std::string buffer_name = used_buffer.GetName(); std::string buffer_name = used_buffer.GetName();
GLuint index = GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str());
glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str());
if (index != -1) if (index != -1)
glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); glUniformBlockBinding(program, index, buffer_draw_state.bindpoint);
} }
state.Apply(); state.Apply();

View File

@ -54,6 +54,11 @@ public:
OGLShader shader; OGLShader shader;
}; };
/// Maximum supported size that a constbuffer can have in bytes.
static constexpr size_t MaxConstbufferSize = 0x1000;
static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
"The maximum size of a constbuffer must be a multiple of the size of GLvec4");
private: private:
class SamplerInfo { class SamplerInfo {
public: public:

View File

@ -9,6 +9,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_bytecode.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
namespace GLShader { namespace GLShader {
@ -366,7 +367,8 @@ public:
/// Generates code representing a uniform (C buffer) register, interpreted as the input type. /// Generates code representing a uniform (C buffer) register, interpreted as the input type.
std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) {
declr_const_buffers[index].MarkAsUsed(index, offset, stage); declr_const_buffers[index].MarkAsUsed(index, offset, stage);
std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" +
std::to_string(offset % 4) + ']';
if (type == GLSLRegister::Type::Float) { if (type == GLSLRegister::Type::Float) {
return value; return value;
@ -380,8 +382,12 @@ public:
std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg,
GLSLRegister::Type type) { GLSLRegister::Type type) {
declr_const_buffers[index].MarkAsUsedIndirect(index, stage); declr_const_buffers[index].MarkAsUsedIndirect(index, stage);
std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" +
GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " +
std::to_string(offset) + ") / 4)";
std::string value =
'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]";
if (type == GLSLRegister::Type::Float) { if (type == GLSLRegister::Type::Float) {
return value; return value;
@ -423,9 +429,10 @@ public:
unsigned const_buffer_layout = 0; unsigned const_buffer_layout = 0;
for (const auto& entry : GetConstBuffersDeclarations()) { for (const auto& entry : GetConstBuffersDeclarations()) {
declarations.AddLine("layout(std430) buffer " + entry.GetName()); declarations.AddLine("layout(std140) uniform " + entry.GetName());
declarations.AddLine('{'); declarations.AddLine('{');
declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) +
"[MAX_CONSTBUFFER_ELEMENTS];");
declarations.AddLine("};"); declarations.AddLine("};");
declarations.AddNewLine(); declarations.AddNewLine();
++const_buffer_layout; ++const_buffer_layout;
@ -1611,7 +1618,10 @@ private:
}; // namespace Decompiler }; // namespace Decompiler
std::string GetCommonDeclarations() { std::string GetCommonDeclarations() {
return "bool exec_shader();"; std::string declarations = "bool exec_shader();\n";
declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
return declarations;
} }
boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset, boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,

View File

@ -223,7 +223,7 @@ void OpenGLState::Apply() const {
if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint ||
current.ssbo != new_state.ssbo) { current.ssbo != new_state.ssbo) {
if (new_state.enabled) { if (new_state.enabled) {
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo); glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo);
} }
} }
} }