From 2927c88fd34b131a3673922eb12ba2c337df8c13 Mon Sep 17 00:00:00 2001
From: wwylele <wwylele@gmail.com>
Date: Wed, 4 Jul 2018 17:16:38 +0300
Subject: [PATCH] gl_rasterizer: implement mipmap for proctex

---
 src/video_core/regs_texturing.h               |  9 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 11 +++
 .../renderer_opengl/gl_rasterizer.h           |  3 +
 .../renderer_opengl/gl_shader_gen.cpp         | 85 ++++++++++++++-----
 .../renderer_opengl/gl_shader_gen.h           |  7 +-
 .../renderer_opengl/gl_shader_manager.h       |  3 +-
 src/video_core/swrasterizer/proctex.cpp       |  2 +-
 7 files changed, 97 insertions(+), 23 deletions(-)

diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
index ff458cc57..7f4ac3ccf 100644
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@@ -251,11 +251,18 @@ struct TexturingRegs {
 
     union {
         BitField<0, 3, ProcTexFilter> filter;
+        BitField<3, 4, u32> lod_min;
+        BitField<7, 4, u32> lod_max;
         BitField<11, 8, u32> width;
         BitField<19, 8, u32> bias_high; // TODO: unimplemented
     } proctex_lut;
 
-    BitField<0, 8, u32> proctex_lut_offset;
+    union {
+        BitField<0, 8, u32> level0;
+        BitField<8, 8, u32> level1;
+        BitField<16, 8, u32> level2;
+        BitField<24, 8, u32> level3;
+    } proctex_lut_offset;
 
     INSERT_PADDING_WORDS(0x1);
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index fb876a3d1..96c675f8c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -186,6 +186,7 @@ void RasterizerOpenGL::SyncEntireState() {
 
     SyncFogColor();
     SyncProcTexNoise();
+    SyncProcTexBias();
     SyncShadowBias();
 }
 
@@ -894,6 +895,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
     case PICA_REG_INDEX(texturing.proctex):
     case PICA_REG_INDEX(texturing.proctex_lut):
     case PICA_REG_INDEX(texturing.proctex_lut_offset):
+        SyncProcTexBias();
         shader_dirty = true;
         break;
 
@@ -1681,6 +1683,15 @@ void RasterizerOpenGL::SyncProcTexNoise() {
     uniform_block_data.dirty = true;
 }
 
+void RasterizerOpenGL::SyncProcTexBias() {
+    const auto& regs = Pica::g_state.regs.texturing;
+    uniform_block_data.data.proctex_bias =
+        Pica::float16::FromRaw(regs.proctex.bias_low | (regs.proctex_lut.bias_high << 8))
+            .ToFloat32();
+
+    uniform_block_data.dirty = true;
+}
+
 void RasterizerOpenGL::SyncAlphaTest() {
     const auto& regs = Pica::g_state.regs;
     if (regs.framebuffer.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2753ddb79..013a4a5e7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -152,6 +152,9 @@ private:
     /// Sync the procedural texture noise configuration to match the PICA register
     void SyncProcTexNoise();
 
+    /// Sync the procedural texture bias configuration to match the PICA register
+    void SyncProcTexBias();
+
     /// Syncs the alpha test states to match the PICA register
     void SyncAlphaTest();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 1b13136cb..b76d8c54c 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -62,6 +62,7 @@ layout (std140) uniform shader_data {
     int proctex_alpha_map_offset;
     int proctex_lut_offset;
     int proctex_diff_lut_offset;
+    float proctex_bias;
     ivec4 lighting_lut_offset[NUM_LIGHTING_SAMPLERS / 4];
     vec3 fog_color;
     vec2 proctex_noise_f;
@@ -226,7 +227,12 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
         state.proctex.u_shift = regs.texturing.proctex.u_shift;
         state.proctex.v_shift = regs.texturing.proctex.v_shift;
         state.proctex.lut_width = regs.texturing.proctex_lut.width;
-        state.proctex.lut_offset = regs.texturing.proctex_lut_offset;
+        state.proctex.lut_offset0 = regs.texturing.proctex_lut_offset.level0;
+        state.proctex.lut_offset1 = regs.texturing.proctex_lut_offset.level1;
+        state.proctex.lut_offset2 = regs.texturing.proctex_lut_offset.level2;
+        state.proctex.lut_offset3 = regs.texturing.proctex_lut_offset.level3;
+        state.proctex.lod_min = regs.texturing.proctex_lut.lod_min;
+        state.proctex.lod_max = regs.texturing.proctex_lut.lod_max;
         state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
     }
 
@@ -1122,6 +1128,42 @@ float ProcTexNoiseCoef(vec2 x) {
         )";
     }
 
+    out += "vec4 SampleProcTexColor(float lut_coord, int level) {\n";
+    out += "int lut_width = " + std::to_string(config.state.proctex.lut_width) + " >> level;\n";
+    std::string offset0 = std::to_string(config.state.proctex.lut_offset0);
+    std::string offset1 = std::to_string(config.state.proctex.lut_offset1);
+    std::string offset2 = std::to_string(config.state.proctex.lut_offset2);
+    std::string offset3 = std::to_string(config.state.proctex.lut_offset3);
+    // Offsets for level 4-7 seem to be hardcoded
+    out += "int lut_offsets[8] = int[](" + offset0 + ", " + offset1 + ", " + offset2 + ", " +
+           offset3 + ", 0xF0, 0xF8, 0xFC, 0xFE);\n";
+    out += "int lut_offset = lut_offsets[level];\n";
+    // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
+    out += "lut_coord *= lut_width - 1;\n";
+
+    switch (config.state.proctex.lut_filter) {
+    case ProcTexFilter::Linear:
+    case ProcTexFilter::LinearMipmapLinear:
+    case ProcTexFilter::LinearMipmapNearest:
+        out += "int lut_index_i = int(lut_coord) + lut_offset;\n";
+        out += "float lut_index_f = fract(lut_coord);\n";
+        out += "return texelFetch(texture_buffer_lut_rgba, lut_index_i + "
+               "proctex_lut_offset) + "
+               "lut_index_f * "
+               "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n";
+        break;
+    case ProcTexFilter::Nearest:
+    case ProcTexFilter::NearestMipmapLinear:
+    case ProcTexFilter::NearestMipmapNearest:
+        out += "lut_coord += lut_offset;\n";
+        // Note: float->int conversion here is indeed floor, not round
+        out += "return texelFetch(texture_buffer_lut_rgba, int(lut_coord) + "
+               "proctex_lut_offset);\n";
+        break;
+    }
+
+    out += "}\n";
+
     out += "vec4 ProcTex() {\n";
     if (config.state.proctex.coord < 3) {
         out += "vec2 uv = abs(texcoord" + std::to_string(config.state.proctex.coord) + ");\n";
@@ -1130,6 +1172,18 @@ float ProcTexNoiseCoef(vec2 x) {
         out += "vec2 uv = abs(texcoord0);\n";
     }
 
+    // This LOD formula is the same as the LOD upper limit defined in OpenGL.
+    // f(x, y) <= m_u + m_v + m_w
+    // (See OpenGL 4.6 spec, 8.14.1 - Scale Factor and Level-of-Detail)
+    // Note: this is different from the one normal 2D textures use.
+    out += "vec2 duv = max(abs(dFdx(uv)), abs(dFdy(uv)));\n";
+    // unlike normal texture, the bias is inside the log2
+    out += "float lod = log2(abs(" + std::to_string(config.state.proctex.lut_width) +
+           " * proctex_bias) * (duv.x + duv.y));\n";
+    out += "if (proctex_bias == 0.0) lod = 0.0;\n";
+    out += "lod = clamp(lod, " +
+           std::to_string(std::max<float>(0.0f, config.state.proctex.lod_min)) + ", " +
+           std::to_string(std::min<float>(7.0f, config.state.proctex.lod_max)) + ");\n";
     // Get shift offset before noise generation
     out += "float u_shift = ";
     AppendProcTexShiftOffset(out, "uv.y", config.state.proctex.u_shift,
@@ -1160,28 +1214,21 @@ float ProcTexNoiseCoef(vec2 x) {
                                "proctex_color_map_offset");
     out += ";\n";
 
-    // Look up color
-    // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
-    out += "lut_coord *= " + std::to_string(config.state.proctex.lut_width - 1) + ";\n";
-    // TODO(wwylele): implement mipmap
     switch (config.state.proctex.lut_filter) {
     case ProcTexFilter::Linear:
-    case ProcTexFilter::LinearMipmapLinear:
-    case ProcTexFilter::LinearMipmapNearest:
-        out += "int lut_index_i = int(lut_coord) + " +
-               std::to_string(config.state.proctex.lut_offset) + ";\n";
-        out += "float lut_index_f = fract(lut_coord);\n";
-        out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, lut_index_i + "
-               "proctex_lut_offset) + "
-               "lut_index_f * "
-               "texelFetch(texture_buffer_lut_rgba, lut_index_i + proctex_diff_lut_offset);\n";
-        break;
     case ProcTexFilter::Nearest:
-    case ProcTexFilter::NearestMipmapLinear:
+        out += "vec4 final_color = SampleProcTexColor(lut_coord, 0);\n";
+        break;
     case ProcTexFilter::NearestMipmapNearest:
-        out += "lut_coord += " + std::to_string(config.state.proctex.lut_offset) + ";\n";
-        out += "vec4 final_color = texelFetch(texture_buffer_lut_rgba, int(round(lut_coord)) + "
-               "proctex_lut_offset);\n";
+    case ProcTexFilter::LinearMipmapNearest:
+        out += "vec4 final_color = SampleProcTexColor(lut_coord, int(round(lod)));\n";
+        break;
+    case ProcTexFilter::NearestMipmapLinear:
+    case ProcTexFilter::LinearMipmapLinear:
+        out += "int lod_i = int(lod);\n";
+        out += "float lod_f = fract(lod);\n";
+        out += "vec4 final_color = mix(SampleProcTexColor(lut_coord, lod_i), "
+               "SampleProcTexColor(lut_coord, lod_i + 1), lod_f);\n";
         break;
     }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index e3f3fd5bd..0467fa8d9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -107,7 +107,12 @@ struct PicaFSConfigState {
         bool noise_enable;
         Pica::TexturingRegs::ProcTexShift u_shift, v_shift;
         u32 lut_width;
-        u32 lut_offset;
+        u32 lut_offset0;
+        u32 lut_offset1;
+        u32 lut_offset2;
+        u32 lut_offset3;
+        u32 lod_min;
+        u32 lod_max;
         Pica::TexturingRegs::ProcTexFilter lut_filter;
     } proctex;
 
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 3233f99e7..42f1fecf5 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -45,6 +45,7 @@ struct UniformData {
     GLint proctex_alpha_map_offset;
     GLint proctex_lut_offset;
     GLint proctex_diff_lut_offset;
+    GLfloat proctex_bias;
     alignas(16) GLivec4 lighting_lut_offset[Pica::LightingRegs::NumLightingSampler / 4];
     alignas(16) GLvec3 fog_color;
     alignas(8) GLvec2 proctex_noise_f;
@@ -58,7 +59,7 @@ struct UniformData {
 };
 
 static_assert(
-    sizeof(UniformData) == 0x4e0,
+    sizeof(UniformData) == 0x4F0,
     "The size of the UniformData structure has changed, update the structure in the shader");
 static_assert(sizeof(UniformData) < 16384,
               "UniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/swrasterizer/proctex.cpp b/src/video_core/swrasterizer/proctex.cpp
index ba24d9730..d4f47a841 100644
--- a/src/video_core/swrasterizer/proctex.cpp
+++ b/src/video_core/swrasterizer/proctex.cpp
@@ -185,7 +185,7 @@ Math::Vec4<u8> ProcTex(float u, float v, TexturingRegs regs, State::ProcTex stat
 
     // Look up the color
     // For the color lut, coord=0.0 is lut[offset] and coord=1.0 is lut[offset+width-1]
-    const u32 offset = regs.proctex_lut_offset;
+    const u32 offset = regs.proctex_lut_offset.level0;
     const u32 width = regs.proctex_lut.width;
     const float index = offset + (lut_coord * (width - 1));
     Math::Vec4<u8> final_color;