From 675f23aedc9a3a99925068e952cbcb3faf88296a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Wed, 18 Sep 2019 01:07:01 -0300
Subject: [PATCH] shader/image: Implement SULD and remove irrelevant code

* Implement SULD as float.
* Remove conditional declaration of GL_ARB_shader_viewport_layer_array.
---
 src/video_core/engines/shader_bytecode.h      |  2 +-
 src/video_core/renderer_opengl/gl_device.cpp  | 22 ++++++
 src/video_core/renderer_opengl/gl_device.h    |  5 ++
 .../renderer_opengl/gl_shader_cache.cpp       | 16 ++--
 .../renderer_opengl/gl_shader_decompiler.cpp  | 17 ++++-
 .../renderer_opengl/gl_shader_decompiler.h    |  1 -
 .../renderer_opengl/gl_shader_disk_cache.cpp  | 10 ---
 .../renderer_vulkan/vk_shader_decompiler.cpp  |  7 ++
 src/video_core/shader/decode/image.cpp        | 73 +++++++++++++------
 src/video_core/shader/node.h                  |  4 +-
 10 files changed, 110 insertions(+), 47 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 12fb8abb7..81dfe33a5 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -1427,7 +1427,7 @@ union Instruction {
             ASSERT(mode == SurfaceDataMode::D_BA);
             return store_data_layout;
         }
-    } sust;
+    } suldst;
 
     union {
         BitField<28, 1, u64> is_ba;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 4f59a87b4..64de7e425 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -2,8 +2,10 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <array>
 #include <cstddef>
+#include <vector>
 #include <glad/glad.h>
 
 #include "common/logging/log.h"
@@ -30,9 +32,27 @@ bool TestProgram(const GLchar* glsl) {
     return link_status == GL_TRUE;
 }
 
+std::vector<std::string_view> GetExtensions() {
+    GLint num_extensions;
+    glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
+    std::vector<std::string_view> extensions;
+    extensions.reserve(num_extensions);
+    for (GLint index = 0; index < num_extensions; ++index) {
+        extensions.push_back(
+            reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, static_cast<GLuint>(index))));
+    }
+    return extensions;
+}
+
+bool HasExtension(const std::vector<std::string_view>& images, std::string_view extension) {
+    return std::find(images.begin(), images.end(), extension) != images.end();
+}
+
 } // Anonymous namespace
 
 Device::Device() {
+    const std::vector extensions = GetExtensions();
+
     uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
     shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
@@ -40,6 +60,7 @@ Device::Device() {
     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                           GLAD_GL_NV_shader_thread_shuffle;
     has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
+    has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
     has_variable_aoffi = TestVariableAoffi();
     has_component_indexing_bug = TestComponentIndexingBug();
     has_precise_bug = TestPreciseBug();
@@ -55,6 +76,7 @@ Device::Device(std::nullptr_t) {
     max_varyings = 15;
     has_warp_intrinsics = true;
     has_vertex_viewport_layer = true;
+    has_image_load_formatted = true;
     has_variable_aoffi = true;
     has_component_indexing_bug = false;
     has_precise_bug = false;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index ba6dcd3be..bb273c3d6 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -38,6 +38,10 @@ public:
         return has_vertex_viewport_layer;
     }
 
+    bool HasImageLoadFormatted() const {
+        return has_image_load_formatted;
+    }
+
     bool HasVariableAoffi() const {
         return has_variable_aoffi;
     }
@@ -61,6 +65,7 @@ private:
     u32 max_varyings{};
     bool has_warp_intrinsics{};
     bool has_vertex_viewport_layer{};
+    bool has_image_load_formatted{};
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
     bool has_precise_bug{};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 0dbc4c02f..42ca3b1bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -211,14 +211,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
     const auto primitive_mode{variant.primitive_mode};
     const auto texture_buffer_usage{variant.texture_buffer_usage};
 
-    std::string source = "#version 430 core\n"
-                         "#extension GL_ARB_separate_shader_objects : enable\n"
-                         "#extension GL_NV_gpu_shader5 : enable\n"
-                         "#extension GL_NV_shader_thread_group : enable\n"
-                         "#extension GL_NV_shader_thread_shuffle : enable\n";
-    if (entries.shader_viewport_layer_array) {
-        source += "#extension GL_ARB_shader_viewport_layer_array : enable\n";
-    }
+    std::string source = R"(#version 430 core
+#extension GL_ARB_separate_shader_objects : enable
+#extension GL_ARB_shader_viewport_layer_array : enable
+#extension GL_EXT_shader_image_load_formatted : enable
+#extension GL_NV_gpu_shader5 : enable
+#extension GL_NV_shader_thread_group : enable
+#extension GL_NV_shader_thread_shuffle : enable
+)";
     if (program_type == ProgramType::Compute) {
         source += "#extension GL_ARB_compute_variable_group_size : require\n";
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 76439e7ab..70ce6572b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -19,6 +19,7 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace OpenGL::GLShader {
@@ -398,8 +399,6 @@ public:
                                                        usage.is_read, usage.is_written);
         }
         entries.clip_distances = ir.GetClipDistances();
-        entries.shader_viewport_layer_array =
-            IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex());
         entries.shader_length = ir.GetLength();
         return entries;
     }
@@ -1801,6 +1800,19 @@ private:
         return {tmp, Type::Float};
     }
 
+    Expression ImageLoad(Operation operation) {
+        if (!device.HasImageLoadFormatted()) {
+            LOG_ERROR(Render_OpenGL,
+                      "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load");
+            return {"0", Type::Int};
+        }
+
+        const auto meta{std::get<MetaImage>(operation.GetMeta())};
+        return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
+                            BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
+                Type::Float};
+    }
+
     Expression ImageStore(Operation operation) {
         const auto meta{std::get<MetaImage>(operation.GetMeta())};
         code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
@@ -2164,6 +2176,7 @@ private:
         &GLSLDecompiler::TextureQueryLod,
         &GLSLDecompiler::TexelFetch,
 
+        &GLSLDecompiler::ImageLoad,
         &GLSLDecompiler::ImageStore,
         &GLSLDecompiler::AtomicImageAdd,
         &GLSLDecompiler::AtomicImageMin,
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 2ea02f5bf..e538dc001 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -90,7 +90,6 @@ struct ShaderEntries {
     std::vector<ImageEntry> images;
     std::vector<GlobalMemoryEntry> global_memory_entries;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
-    bool shader_viewport_layer_array{};
     std::size_t shader_length{};
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index f141c4e3b..02b4dd234 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -382,12 +382,6 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
         }
     }
 
-    bool shader_viewport_layer_array{};
-    if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) {
-        return {};
-    }
-    entry.entries.shader_viewport_layer_array = shader_viewport_layer_array;
-
     u64 shader_length{};
     if (!LoadObjectFromPrecompiled(shader_length)) {
         return {};
@@ -464,10 +458,6 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std:
         }
     }
 
-    if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) {
-        return false;
-    }
-
     if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
         return false;
     }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index f7fbbb6e4..9d31bff43 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -19,6 +19,7 @@
 #include "video_core/engines/shader_header.h"
 #include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
+#include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace Vulkan::VKShader {
@@ -939,6 +940,11 @@ private:
         return {};
     }
 
+    Id ImageLoad(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
     Id ImageStore(Operation operation) {
         UNIMPLEMENTED();
         return {};
@@ -1440,6 +1446,7 @@ private:
         &SPIRVDecompiler::TextureQueryLod,
         &SPIRVDecompiler::TexelFetch,
 
+        &SPIRVDecompiler::ImageLoad,
         &SPIRVDecompiler::ImageStore,
         &SPIRVDecompiler::AtomicImageAdd,
         &SPIRVDecompiler::AtomicImageMin,
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index d54fb88c9..e611f9f3b 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -41,11 +41,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
+    const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
+        std::vector<Node> coords;
+        const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
+        coords.reserve(num_coords);
+        for (std::size_t i = 0; i < num_coords; ++i) {
+            coords.push_back(GetRegister(instr.gpr8.Value() + i));
+        }
+        return coords;
+    };
+
     switch (opcode->get().GetId()) {
+    case OpCode::Id::SULD: {
+        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
+        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
+                         Tegra::Shader::OutOfBoundsStore::Ignore);
+
+        const auto type{instr.suldst.image_type};
+        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
+                                              : GetBindlessImage(instr.gpr39, type)};
+        image.MarkRead();
+
+        u32 indexer = 0;
+        for (u32 element = 0; element < 4; ++element) {
+            if (!instr.suldst.IsComponentEnabled(element)) {
+                continue;
+            }
+            MetaImage meta{image, {}, element};
+            Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
+            SetTemporary(bb, indexer++, std::move(value));
+        }
+        for (u32 i = 0; i < indexer; ++i) {
+            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
+        }
+        break;
+    }
     case OpCode::Id::SUST: {
-        UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P);
-        UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore);
-        UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
+        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
+        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
+                         Tegra::Shader::OutOfBoundsStore::Ignore);
+        UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
 
         std::vector<Node> values;
         constexpr std::size_t hardcoded_size{4};
@@ -53,32 +88,18 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
             values.push_back(GetRegister(instr.gpr0.Value() + i));
         }
 
-        std::vector<Node> coords;
-        const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
-        for (std::size_t i = 0; i < num_coords; ++i) {
-            coords.push_back(GetRegister(instr.gpr8.Value() + i));
-        }
-
-        const auto type{instr.sust.image_type};
-        auto& image{instr.sust.is_immediate ? GetImage(instr.image, type)
-                                            : GetBindlessImage(instr.gpr39, type)};
+        const auto type{instr.suldst.image_type};
+        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
+                                              : GetBindlessImage(instr.gpr39, type)};
         image.MarkWrite();
 
-        MetaImage meta{image, values};
-        bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords)));
+        MetaImage meta{image, std::move(values)};
+        bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
         break;
     }
     case OpCode::Id::SUATOM: {
         UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
 
-        Node value = GetRegister(instr.gpr0);
-
-        std::vector<Node> coords;
-        const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)};
-        for (std::size_t i = 0; i < num_coords; ++i) {
-            coords.push_back(GetRegister(instr.gpr8.Value() + i));
-        }
-
         const OperationCode operation_code = [instr] {
             switch (instr.suatom_d.operation) {
             case Tegra::Shader::ImageAtomicOperation::Add:
@@ -102,9 +123,13 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
             }
         }();
 
-        const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)};
+        Node value = GetRegister(instr.gpr0);
+
+        const auto type = instr.suatom_d.image_type;
+        const auto& image{GetImage(instr.image, type, instr.suatom_d.size)};
+
         MetaImage meta{image, {std::move(value)}};
-        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords)));
+        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
         break;
     }
     default:
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index abf2cb1ab..e5b75783d 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -149,7 +149,8 @@ enum class OperationCode {
     TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
     TexelFetch,             /// (MetaTexture, int[N], int) -> float4
 
-    ImageStore,          /// (MetaImage, int[N] values) -> void
+    ImageLoad,           /// (MetaImage, int[N] coords) -> void
+    ImageStore,          /// (MetaImage, int[N] coords) -> void
     AtomicImageAdd,      /// (MetaImage, int[N] coords) -> void
     AtomicImageMin,      /// (MetaImage, int[N] coords) -> void
     AtomicImageMax,      /// (MetaImage, int[N] coords) -> void
@@ -402,6 +403,7 @@ struct MetaTexture {
 struct MetaImage {
     const Image& image;
     std::vector<Node> values;
+    u32 element{};
 };
 
 /// Parameters that modify an operation but are not part of any particular operand