1
0
Fork 0

Merge pull request #3759 from degasus/TBO_upload

Rewrite texture_buffer_object handling.
This commit is contained in:
James Rowe 2018-06-26 09:24:55 -06:00 committed by GitHub
commit 26254072e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 244 additions and 362 deletions

View File

@ -37,7 +37,8 @@ MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100,
RasterizerOpenGL::RasterizerOpenGL()
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) {
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE),
texture_buffer(GL_TEXTURE_BUFFER, TEXTURE_BUFFER_SIZE) {
allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
GLAD_GL_ARB_framebuffer_no_attachments;
@ -66,7 +67,8 @@ RasterizerOpenGL::RasterizerOpenGL()
uniform_block_data.dirty = true;
uniform_block_data.lut_dirty.fill(true);
uniform_block_data.lighting_lut_dirty.fill(true);
uniform_block_data.lighting_lut_dirty_any = true;
uniform_block_data.fog_lut_dirty = true;
@ -122,77 +124,16 @@ RasterizerOpenGL::RasterizerOpenGL()
// Create render framebuffer
framebuffer.Create();
// Allocate and bind lighting lut textures
lighting_lut.Create();
state.lighting_lut.texture_buffer = lighting_lut.handle;
// Allocate and bind texture buffer lut textures
texture_buffer_lut_rg.Create();
texture_buffer_lut_rgba.Create();
state.texture_buffer_lut_rg.texture_buffer = texture_buffer_lut_rg.handle;
state.texture_buffer_lut_rgba.texture_buffer = texture_buffer_lut_rgba.handle;
state.Apply();
lighting_lut_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER,
sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr,
GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::LightingLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle);
// Setup the LUT for the fog
fog_lut.Create();
state.fog_lut.texture_buffer = fog_lut.handle;
state.Apply();
fog_lut_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::FogLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, fog_lut_buffer.handle);
// Setup the noise LUT for proctex
proctex_noise_lut.Create();
state.proctex_noise_lut.texture_buffer = proctex_noise_lut.handle;
state.Apply();
proctex_noise_lut_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, proctex_noise_lut_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_noise_lut_buffer.handle);
// Setup the color map for proctex
proctex_color_map.Create();
state.proctex_color_map.texture_buffer = proctex_color_map.handle;
state.Apply();
proctex_color_map_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, proctex_color_map_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_color_map_buffer.handle);
// Setup the alpha map for proctex
proctex_alpha_map.Create();
state.proctex_alpha_map.texture_buffer = proctex_alpha_map.handle;
state.Apply();
proctex_alpha_map_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, proctex_alpha_map_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, proctex_alpha_map_buffer.handle);
// Setup the LUT for proctex
proctex_lut.Create();
state.proctex_lut.texture_buffer = proctex_lut.handle;
state.Apply();
proctex_lut_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_lut_buffer.handle);
// Setup the difference LUT for proctex
proctex_diff_lut.Create();
state.proctex_diff_lut.texture_buffer = proctex_diff_lut.handle;
state.Apply();
proctex_diff_lut_buffer.Create();
glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle);
glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 4 * 256, nullptr, GL_DYNAMIC_DRAW);
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, proctex_diff_lut_buffer.handle);
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, texture_buffer.GetHandle());
glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
glTexBuffer(GL_TEXTURE_BUFFER, GL_RGBA32F, texture_buffer.GetHandle());
// Bind index buffer for hardware shader path
state.draw.vertex_array = hw_vao.handle;
@ -803,49 +744,8 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
shader_dirty = false;
}
// Sync the lighting luts
for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) {
if (uniform_block_data.lut_dirty[index]) {
SyncLightingLUT(index);
uniform_block_data.lut_dirty[index] = false;
}
}
// Sync the fog lut
if (uniform_block_data.fog_lut_dirty) {
SyncFogLUT();
uniform_block_data.fog_lut_dirty = false;
}
// Sync the proctex noise lut
if (uniform_block_data.proctex_noise_lut_dirty) {
SyncProcTexNoiseLUT();
uniform_block_data.proctex_noise_lut_dirty = false;
}
// Sync the proctex color map
if (uniform_block_data.proctex_color_map_dirty) {
SyncProcTexColorMap();
uniform_block_data.proctex_color_map_dirty = false;
}
// Sync the proctex alpha map
if (uniform_block_data.proctex_alpha_map_dirty) {
SyncProcTexAlphaMap();
uniform_block_data.proctex_alpha_map_dirty = false;
}
// Sync the proctex lut
if (uniform_block_data.proctex_lut_dirty) {
SyncProcTexLUT();
uniform_block_data.proctex_lut_dirty = false;
}
// Sync the proctex difference lut
if (uniform_block_data.proctex_diff_lut_dirty) {
SyncProcTexDiffLUT();
uniform_block_data.proctex_diff_lut_dirty = false;
}
// Sync the LUTs within the texture buffer
SyncAndUploadLUTs();
// Sync the uniform data
const bool use_gs = regs.pipeline.use_gs == Pica::PipelineRegs::UseGS::Yes;
@ -1408,7 +1308,8 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
auto& lut_config = regs.lighting.lut_config;
uniform_block_data.lut_dirty[lut_config.type] = true;
uniform_block_data.lighting_lut_dirty[lut_config.type] = true;
uniform_block_data.lighting_lut_dirty_any = true;
break;
}
}
@ -1763,21 +1664,6 @@ void RasterizerOpenGL::SyncFogColor() {
uniform_block_data.dirty = true;
}
void RasterizerOpenGL::SyncFogLUT() {
std::array<GLvec2, 128> new_data;
std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
[](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != fog_lut_data) {
fog_lut_data = new_data;
glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle);
glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data());
}
}
void RasterizerOpenGL::SyncProcTexNoise() {
const auto& regs = Pica::g_state.regs.texturing;
uniform_block_data.data.proctex_noise_f = {
@ -1796,70 +1682,6 @@ void RasterizerOpenGL::SyncProcTexNoise() {
uniform_block_data.dirty = true;
}
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,
std::array<GLvec2, 128>& lut_data, GLuint buffer) {
std::array<GLvec2, 128> new_data;
std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lut_data) {
lut_data = new_data;
glBindBuffer(GL_TEXTURE_BUFFER, buffer);
glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data());
}
}
void RasterizerOpenGL::SyncProcTexNoiseLUT() {
SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data,
proctex_noise_lut_buffer.handle);
}
void RasterizerOpenGL::SyncProcTexColorMap() {
SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
proctex_color_map_buffer.handle);
}
void RasterizerOpenGL::SyncProcTexAlphaMap() {
SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
proctex_alpha_map_buffer.handle);
}
void RasterizerOpenGL::SyncProcTexLUT() {
std::array<GLvec4, 256> new_data;
std::transform(Pica::g_state.proctex.color_table.begin(),
Pica::g_state.proctex.color_table.end(), new_data.begin(),
[](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_lut_data) {
proctex_lut_data = new_data;
glBindBuffer(GL_TEXTURE_BUFFER, proctex_lut_buffer.handle);
glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data());
}
}
void RasterizerOpenGL::SyncProcTexDiffLUT() {
std::array<GLvec4, 256> new_data;
std::transform(Pica::g_state.proctex.color_diff_table.begin(),
Pica::g_state.proctex.color_diff_table.end(), new_data.begin(),
[](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_diff_lut_data) {
proctex_diff_lut_data = new_data;
glBindBuffer(GL_TEXTURE_BUFFER, proctex_diff_lut_buffer.handle);
glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec4), new_data.data());
}
}
void RasterizerOpenGL::SyncAlphaTest() {
const auto& regs = Pica::g_state.regs;
if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
@ -1957,21 +1779,6 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
}
}
void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
std::array<GLvec2, 256> new_data;
const auto& source_lut = Pica::g_state.lighting.luts[lut_index];
std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lighting_lut_data[lut_index]) {
lighting_lut_data[lut_index] = new_data;
glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2),
new_data.size() * sizeof(GLvec2), new_data.data());
}
}
void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
@ -2062,6 +1869,158 @@ void RasterizerOpenGL::SyncShadowBias() {
}
}
void RasterizerOpenGL::SyncAndUploadLUTs() {
constexpr size_t max_size = sizeof(GLvec2) * 256 * Pica::LightingRegs::NumLightingSampler +
sizeof(GLvec2) * 128 + // fog
sizeof(GLvec2) * 128 * 3 + // proctex: noise + color + alpha
sizeof(GLvec4) * 256 + // proctex
sizeof(GLvec4) * 256; // proctex diff
if (!uniform_block_data.lighting_lut_dirty_any && !uniform_block_data.fog_lut_dirty &&
!uniform_block_data.proctex_noise_lut_dirty &&
!uniform_block_data.proctex_color_map_dirty &&
!uniform_block_data.proctex_alpha_map_dirty && !uniform_block_data.proctex_lut_dirty &&
!uniform_block_data.proctex_diff_lut_dirty) {
return;
}
u8* buffer;
GLintptr offset;
bool invalidate;
size_t bytes_used = 0;
glBindBuffer(GL_TEXTURE_BUFFER, texture_buffer.GetHandle());
std::tie(buffer, offset, invalidate) = texture_buffer.Map(max_size, sizeof(GLvec4));
// Sync the lighting luts
if (uniform_block_data.lighting_lut_dirty_any || invalidate) {
for (unsigned index = 0; index < uniform_block_data.lighting_lut_dirty.size(); index++) {
if (uniform_block_data.lighting_lut_dirty[index] || invalidate) {
std::array<GLvec2, 256> new_data;
const auto& source_lut = Pica::g_state.lighting.luts[index];
std::transform(source_lut.begin(), source_lut.end(), new_data.begin(),
[](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lighting_lut_data[index] || invalidate) {
lighting_lut_data[index] = new_data;
std::memcpy(buffer + bytes_used, new_data.data(),
new_data.size() * sizeof(GLvec2));
uniform_block_data.data.lighting_lut_offset[index / 4][index % 4] =
(offset + bytes_used) / sizeof(GLvec2);
uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(GLvec2);
}
uniform_block_data.lighting_lut_dirty[index] = false;
}
}
}
uniform_block_data.lighting_lut_dirty_any = false;
// Sync the fog lut
if (uniform_block_data.fog_lut_dirty || invalidate) {
std::array<GLvec2, 128> new_data;
std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
[](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != fog_lut_data || invalidate) {
fog_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2));
uniform_block_data.data.fog_lut_offset = (offset + bytes_used) / sizeof(GLvec2);
uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(GLvec2);
}
uniform_block_data.fog_lut_dirty = false;
}
// helper function for SyncProcTexNoiseLUT/ColorMap/AlphaMap
auto SyncProcTexValueLUT = [this, buffer, offset, invalidate, &bytes_used](
const std::array<Pica::State::ProcTex::ValueEntry, 128>& lut,
std::array<GLvec2, 128>& lut_data, GLint& lut_offset) {
std::array<GLvec2, 128> new_data;
std::transform(lut.begin(), lut.end(), new_data.begin(), [](const auto& entry) {
return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
});
if (new_data != lut_data || invalidate) {
lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec2));
lut_offset = (offset + bytes_used) / sizeof(GLvec2);
uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(GLvec2);
}
};
// Sync the proctex noise lut
if (uniform_block_data.proctex_noise_lut_dirty || invalidate) {
SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data,
uniform_block_data.data.proctex_noise_lut_offset);
uniform_block_data.proctex_noise_lut_dirty = false;
}
// Sync the proctex color map
if (uniform_block_data.proctex_color_map_dirty || invalidate) {
SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
uniform_block_data.data.proctex_color_map_offset);
uniform_block_data.proctex_color_map_dirty = false;
}
// Sync the proctex alpha map
if (uniform_block_data.proctex_alpha_map_dirty || invalidate) {
SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
uniform_block_data.data.proctex_alpha_map_offset);
uniform_block_data.proctex_alpha_map_dirty = false;
}
// Sync the proctex lut
if (uniform_block_data.proctex_lut_dirty || invalidate) {
std::array<GLvec4, 256> new_data;
std::transform(Pica::g_state.proctex.color_table.begin(),
Pica::g_state.proctex.color_table.end(), new_data.begin(),
[](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_lut_data || invalidate) {
proctex_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4));
uniform_block_data.data.proctex_lut_offset = (offset + bytes_used) / sizeof(GLvec4);
uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(GLvec4);
}
uniform_block_data.proctex_lut_dirty = false;
}
// Sync the proctex difference lut
if (uniform_block_data.proctex_diff_lut_dirty || invalidate) {
std::array<GLvec4, 256> new_data;
std::transform(Pica::g_state.proctex.color_diff_table.begin(),
Pica::g_state.proctex.color_diff_table.end(), new_data.begin(),
[](const auto& entry) {
auto rgba = entry.ToVector() / 255.0f;
return GLvec4{rgba.r(), rgba.g(), rgba.b(), rgba.a()};
});
if (new_data != proctex_diff_lut_data || invalidate) {
proctex_diff_lut_data = new_data;
std::memcpy(buffer + bytes_used, new_data.data(), new_data.size() * sizeof(GLvec4));
uniform_block_data.data.proctex_diff_lut_offset =
(offset + bytes_used) / sizeof(GLvec4);
uniform_block_data.dirty = true;
bytes_used += new_data.size() * sizeof(GLvec4);
}
uniform_block_data.proctex_diff_lut_dirty = false;
}
texture_buffer.Unmap(bytes_used);
}
void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) {
// glBindBufferRange below also changes the generic buffer binding point, so we sync the state
// first

View File

@ -148,18 +148,10 @@ private:
/// Syncs the fog states to match the PICA register
void SyncFogColor();
void SyncFogLUT();
/// Sync the procedural texture noise configuration to match the PICA register
void SyncProcTexNoise();
/// Sync the procedural texture lookup tables
void SyncProcTexNoiseLUT();
void SyncProcTexColorMap();
void SyncProcTexAlphaMap();
void SyncProcTexLUT();
void SyncProcTexDiffLUT();
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
@ -190,9 +182,6 @@ private:
/// Syncs the lighting global ambient color to match the PICA register
void SyncGlobalAmbient();
/// Syncs the lighting lookup tables
void SyncLightingLUT(unsigned index);
/// Syncs the specified light's specular 0 color to match the PICA register
void SyncLightSpecular0(int light_index);
@ -220,6 +209,9 @@ private:
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Syncs and uploads the lighting, fog and proctex LUTs
void SyncAndUploadLUTs();
/// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw, bool use_gs);
@ -258,7 +250,8 @@ private:
struct {
UniformData data;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lut_dirty;
std::array<bool, Pica::LightingRegs::NumLightingSampler> lighting_lut_dirty;
bool lighting_lut_dirty_any;
bool fog_lut_dirty;
bool proctex_noise_lut_dirty;
bool proctex_color_map_dirty;
@ -274,6 +267,7 @@ private:
static constexpr size_t VERTEX_BUFFER_SIZE = 32 * 1024 * 1024;
static constexpr size_t INDEX_BUFFER_SIZE = 1 * 1024 * 1024;
static constexpr size_t UNIFORM_BUFFER_SIZE = 2 * 1024 * 1024;
static constexpr size_t TEXTURE_BUFFER_SIZE = 1 * 1024 * 1024;
OGLVertexArray sw_vao; // VAO for software shader draw
OGLVertexArray hw_vao; // VAO for hardware shader / accelerate draw
@ -283,6 +277,7 @@ private:
OGLStreamBuffer vertex_buffer;
OGLStreamBuffer uniform_buffer;
OGLStreamBuffer index_buffer;
OGLStreamBuffer texture_buffer;
OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment;
size_t uniform_size_aligned_vs;
@ -291,32 +286,15 @@ private:
SamplerInfo texture_cube_sampler;
OGLBuffer lighting_lut_buffer;
OGLTexture lighting_lut;
OGLTexture texture_buffer_lut_rg;
OGLTexture texture_buffer_lut_rgba;
std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
OGLBuffer fog_lut_buffer;
OGLTexture fog_lut;
std::array<GLvec2, 128> fog_lut_data{};
OGLBuffer proctex_noise_lut_buffer;
OGLTexture proctex_noise_lut;
std::array<GLvec2, 128> proctex_noise_lut_data{};
OGLBuffer proctex_color_map_buffer;
OGLTexture proctex_color_map;
std::array<GLvec2, 128> proctex_color_map_data{};
OGLBuffer proctex_alpha_map_buffer;
OGLTexture proctex_alpha_map;
std::array<GLvec2, 128> proctex_alpha_map_data{};
OGLBuffer proctex_lut_buffer;
OGLTexture proctex_lut;
std::array<GLvec4, 256> proctex_lut_data{};
OGLBuffer proctex_diff_lut_buffer;
OGLTexture proctex_diff_lut;
std::array<GLvec4, 256> proctex_diff_lut_data{};
bool allow_shadow;

View File

@ -32,6 +32,7 @@ namespace GLShader {
static const std::string UniformBlockDef = R"(
#define NUM_TEV_STAGES 6
#define NUM_LIGHTS 8
#define NUM_LIGHTING_SAMPLERS 24
struct LightSrc {
vec3 specular_0;
@ -55,6 +56,13 @@ layout (std140) uniform shader_data {
int scissor_y1;
int scissor_x2;
int scissor_y2;
int fog_lut_offset;
int proctex_noise_lut_offset;
int proctex_color_map_offset;
int proctex_alpha_map_offset;
int proctex_lut_offset;
int proctex_diff_lut_offset;
ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
vec3 fog_color;
vec2 proctex_noise_f;
vec2 proctex_noise_a;
@ -1017,7 +1025,7 @@ void AppendProcTexClamp(std::string& out, const std::string& var, ProcTexClamp m
}
void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner,
const std::string& map_lut) {
const std::string& offset) {
std::string combined;
switch (combiner) {
case ProcTexCombiner::U:
@ -1055,7 +1063,7 @@ void AppendProcTexCombineAndMap(std::string& out, ProcTexCombiner combiner,
combined = "0.0";
break;
}
out += "ProcTexLookupLUT(" + map_lut + ", " + combined + ")";
out += "ProcTexLookupLUT(" + offset + ", " + combined + ")";
}
void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) {
@ -1064,12 +1072,12 @@ void AppendProcTexSampler(std::string& out, const PicaFSConfig& config) {
// coord=1.0 is lut[127]+lut_diff[127]. For other indices, the result is interpolated using
// value entries and difference entries.
out += R"(
float ProcTexLookupLUT(samplerBuffer lut, float coord) {
float ProcTexLookupLUT(int offset, float coord) {
coord *= 128;
float index_i = clamp(floor(coord), 0.0, 127.0);
float index_f = coord - index_i; // fract() cannot be used here because 128.0 needs to be
// extracted as index_i = 127.0 and index_f = 1.0
vec2 entry = texelFetch(lut, int(index_i)).rg;
vec2 entry = texelFetch(texture_buffer_lut_rg, int(index_i) + offset).rg;
return clamp(entry.r + entry.g * index_f, 0.0, 1.0);
}
)";
@ -1105,8 +1113,8 @@ float ProcTexNoiseCoef(vec2 x) {
float g2 = ProcTexNoiseRand2D(point + vec2(0.0, 1.0)) * (frac.x + frac.y - 1.0);
float g3 = ProcTexNoiseRand2D(point + vec2(1.0, 1.0)) * (frac.x + frac.y - 2.0);
float x_noise = ProcTexLookupLUT(proctex_noise_lut, frac.x);
float y_noise = ProcTexLookupLUT(proctex_noise_lut, frac.y);
float x_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.x);
float y_noise = ProcTexLookupLUT(proctex_noise_lut_offset, frac.y);
float x0 = mix(g0, g1, x_noise);
float x1 = mix(g2, g3, x_noise);
return mix(x0, x1, y_noise);
@ -1148,7 +1156,8 @@ float ProcTexNoiseCoef(vec2 x) {
// Combine and map
out += "float lut_coord = ";
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner, "proctex_color_map");
AppendProcTexCombineAndMap(out, config.state.proctex.color_combiner,
"proctex_color_map_offset");
out += ";\n";
// Look up color
@ -1162,14 +1171,17 @@ float ProcTexNoiseCoef(vec2 x) {
out += "int lut_index_i = int(lut_coord) + " +
std::to_string(config.state.proctex.lut_offset) + ";\n";
out += "float lut_index_f = fract(lut_coord);\n";
out += "vec4 final_color = texelFetch(proctex_lut, lut_index_i) + lut_index_f * "
"texelFetch(proctex_diff_lut, lut_index_i);\n";
out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, lut_index_i + "
"proctex_lut_offset) + "
"lut_index_f * "
"texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n";
break;
case ProcTexFilter::Nearest:
case ProcTexFilter::NearestMipmapLinear:
case ProcTexFilter::NearestMipmapNearest:
out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n";
out += "vec4 final_color = texelFetch(proctex_lut, int(round(lut_coord)));\n";
out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + "
"proctex_lut_offset);\n";
break;
}
@ -1177,7 +1189,8 @@ float ProcTexNoiseCoef(vec2 x) {
// Note: in separate alpha mode, the alpha channel skips the color LUT look up stage. It
// uses the output of CombineAndMap directly instead.
out += "float final_alpha = ";
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner, "proctex_alpha_map");
AppendProcTexCombineAndMap(out, config.state.proctex.alpha_combiner,
"proctex_alpha_map_offset");
out += ";\n";
out += "return vec4(final_color.xyz, final_alpha);\n}\n";
} else {
@ -1210,13 +1223,8 @@ uniform sampler2D tex0;
uniform sampler2D tex1;
uniform sampler2D tex2;
uniform samplerCube tex_cube;
uniform samplerBuffer lighting_lut;
uniform samplerBuffer fog_lut;
uniform samplerBuffer proctex_noise_lut;
uniform samplerBuffer proctex_color_map;
uniform samplerBuffer proctex_alpha_map;
uniform samplerBuffer proctex_lut;
uniform samplerBuffer proctex_diff_lut;
uniform samplerBuffer texture_buffer_lut_rg;
uniform samplerBuffer texture_buffer_lut_rgba;
#if ALLOW_SHADOW
layout(r32ui) uniform readonly uimage2D shadow_texture_px;
@ -1238,7 +1246,7 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
}
float LookupLightingLUT(int lut_index, int index, float delta) {
vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg;
vec2 entry = texelFetch(texture_buffer_lut_rg, lighting_lut_offset[lut_index >> 2][lut_index & 3] + index).rg;
return entry.r + entry.g * delta;
}
@ -1481,7 +1489,8 @@ vec4 secondary_fragment_color = vec4(0.0);
// Generate clamped fog factor from LUT for given fog index
out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
out += "float fog_f = fog_index - fog_i;\n";
out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n";
out += "vec2 fog_lut_entry = texelFetch(texture_buffer_lut_rg, int(fog_i) + "
"fog_lut_offset).rg;\n";
out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n";
out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";

View File

@ -55,13 +55,8 @@ static void SetShaderSamplerBindings(GLuint shader) {
SetShaderSamplerBinding(shader, "tex_cube", TextureUnits::TextureCube);
// Set the texture samplers to correspond to different lookup table texture units
SetShaderSamplerBinding(shader, "lighting_lut", TextureUnits::LightingLUT);
SetShaderSamplerBinding(shader, "fog_lut", TextureUnits::FogLUT);
SetShaderSamplerBinding(shader, "proctex_noise_lut", TextureUnits::ProcTexNoiseLUT);
SetShaderSamplerBinding(shader, "proctex_color_map", TextureUnits::ProcTexColorMap);
SetShaderSamplerBinding(shader, "proctex_alpha_map", TextureUnits::ProcTexAlphaMap);
SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT);
SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rg", TextureUnits::TextureBufferLUT_RG);
SetShaderSamplerBinding(shader, "texture_buffer_lut_rgba", TextureUnits::TextureBufferLUT_RGBA);
SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer);
SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX);

View File

@ -6,6 +6,7 @@
#include <memory>
#include <glad/glad.h>
#include "video_core/regs_lighting.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/pica_to_gl.h"
@ -38,6 +39,13 @@ struct UniformData {
GLint scissor_y1;
GLint scissor_x2;
GLint scissor_y2;
GLint fog_lut_offset;
GLint proctex_noise_lut_offset;
GLint proctex_color_map_offset;
GLint proctex_alpha_map_offset;
GLint proctex_lut_offset;
GLint proctex_diff_lut_offset;
alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
alignas(16) GLvec3 fog_color;
alignas(8) GLvec2 proctex_noise_f;
alignas(8) GLvec2 proctex_noise_a;
@ -50,7 +58,7 @@ struct UniformData {
};
static_assert(
sizeof(UniformData) == 0x470,
sizeof(UniformData) == 0x4e0,
"The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec");

View File

@ -55,15 +55,8 @@ OpenGLState::OpenGLState() {
texture_cube_unit.texture_cube = 0;
texture_cube_unit.sampler = 0;
lighting_lut.texture_buffer = 0;
fog_lut.texture_buffer = 0;
proctex_lut.texture_buffer = 0;
proctex_diff_lut.texture_buffer = 0;
proctex_color_map.texture_buffer = 0;
proctex_alpha_map.texture_buffer = 0;
proctex_noise_lut.texture_buffer = 0;
texture_buffer_lut_rg.texture_buffer = 0;
texture_buffer_lut_rgba.texture_buffer = 0;
image_shadow_buffer = 0;
image_shadow_texture_px = 0;
@ -221,46 +214,17 @@ void OpenGLState::Apply() const {
glBindSampler(TextureUnits::TextureCube.id, texture_cube_unit.sampler);
}
// Lighting LUTs
if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
glActiveTexture(TextureUnits::LightingLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, lighting_lut.texture_buffer);
// Texture buffer LUTs
if (texture_buffer_lut_rg.texture_buffer != cur_state.texture_buffer_lut_rg.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_RG.Enum());
glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rg.texture_buffer);
}
// Fog LUT
if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) {
glActiveTexture(TextureUnits::FogLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer);
}
// ProcTex Noise LUT
if (proctex_noise_lut.texture_buffer != cur_state.proctex_noise_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_noise_lut.texture_buffer);
}
// ProcTex Color Map
if (proctex_color_map.texture_buffer != cur_state.proctex_color_map.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_color_map.texture_buffer);
}
// ProcTex Alpha Map
if (proctex_alpha_map.texture_buffer != cur_state.proctex_alpha_map.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_alpha_map.texture_buffer);
}
// ProcTex LUT
if (proctex_lut.texture_buffer != cur_state.proctex_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_lut.texture_buffer);
}
// ProcTex Diff LUT
if (proctex_diff_lut.texture_buffer != cur_state.proctex_diff_lut.texture_buffer) {
glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer);
// Texture buffer LUTs
if (texture_buffer_lut_rgba.texture_buffer !=
cur_state.texture_buffer_lut_rgba.texture_buffer) {
glActiveTexture(TextureUnits::TextureBufferLUT_RGBA.Enum());
glBindTexture(GL_TEXTURE_BUFFER, texture_buffer_lut_rgba.texture_buffer);
}
// Shadow Images
@ -374,20 +338,10 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
}
if (texture_cube_unit.texture_cube == handle)
texture_cube_unit.texture_cube = 0;
if (lighting_lut.texture_buffer == handle)
lighting_lut.texture_buffer = 0;
if (fog_lut.texture_buffer == handle)
fog_lut.texture_buffer = 0;
if (proctex_noise_lut.texture_buffer == handle)
proctex_noise_lut.texture_buffer = 0;
if (proctex_color_map.texture_buffer == handle)
proctex_color_map.texture_buffer = 0;
if (proctex_alpha_map.texture_buffer == handle)
proctex_alpha_map.texture_buffer = 0;
if (proctex_lut.texture_buffer == handle)
proctex_lut.texture_buffer = 0;
if (proctex_diff_lut.texture_buffer == handle)
proctex_diff_lut.texture_buffer = 0;
if (texture_buffer_lut_rg.texture_buffer == handle)
texture_buffer_lut_rg.texture_buffer = 0;
if (texture_buffer_lut_rgba.texture_buffer == handle)
texture_buffer_lut_rgba.texture_buffer = 0;
if (image_shadow_buffer == handle)
image_shadow_buffer = 0;
if (image_shadow_texture_px == handle)

View File

@ -20,14 +20,9 @@ constexpr TextureUnit PicaTexture(int unit) {
return TextureUnit{unit};
}
constexpr TextureUnit LightingLUT{3};
constexpr TextureUnit FogLUT{4};
constexpr TextureUnit ProcTexNoiseLUT{5};
constexpr TextureUnit ProcTexColorMap{6};
constexpr TextureUnit ProcTexAlphaMap{7};
constexpr TextureUnit ProcTexLUT{8};
constexpr TextureUnit ProcTexDiffLUT{9};
constexpr TextureUnit TextureCube{10};
constexpr TextureUnit TextureCube{3};
constexpr TextureUnit TextureBufferLUT_RG{4};
constexpr TextureUnit TextureBufferLUT_RGBA{5};
} // namespace TextureUnits
@ -105,31 +100,11 @@ public:
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} lighting_lut;
} texture_buffer_lut_rg;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} fog_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_noise_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_color_map;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_alpha_map;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_lut;
struct {
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_diff_lut;
} texture_buffer_lut_rgba;
// GL_IMAGE_BINDING_NAME
GLuint image_shadow_buffer;

View File

@ -87,7 +87,7 @@ std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr a
void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);
if (!coherent) {
if (!coherent && size > 0) {
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
}

View File

@ -23,6 +23,10 @@ using GLuvec2 = std::array<GLuint, 2>;
using GLuvec3 = std::array<GLuint, 3>;
using GLuvec4 = std::array<GLuint, 4>;
using GLivec2 = std::array<GLint, 2>;
using GLivec3 = std::array<GLint, 3>;
using GLivec4 = std::array<GLint, 4>;
namespace PicaToGL {
inline GLenum TextureFilterMode(Pica::TexturingRegs::TextureConfig::TextureFilter mode) {