shader: Remove old shader management

2021-02-16 20:52:12 -03:00 · 2021-02-16 20:52:12 -03:00 · c67d64365a
commit c67d64365a
parent 58914796c0
83 changed files with 57 additions and 19625 deletions
--- a/CMakeModules/GenerateSCMRev.cmake
+++ b/CMakeModules/GenerateSCMRev.cmake
@ -51,61 +51,7 @@ endif()
 # The variable SRC_DIR must be passed into the script (since it uses the current build directory for all values of CMAKE_*_DIR)
 set(VIDEO_CORE "${SRC_DIR}/src/video_core")
 set(HASH_FILES
-    "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
+    # ...
    "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
    "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
    "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
    "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
    "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
    "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
    "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
    "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
    "${VIDEO_CORE}/shader/decode/bfe.cpp"
    "${VIDEO_CORE}/shader/decode/bfi.cpp"
    "${VIDEO_CORE}/shader/decode/conversion.cpp"
    "${VIDEO_CORE}/shader/decode/ffma.cpp"
    "${VIDEO_CORE}/shader/decode/float_set.cpp"
    "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/half_set.cpp"
    "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/hfma2.cpp"
    "${VIDEO_CORE}/shader/decode/image.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set.cpp"
    "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/memory.cpp"
    "${VIDEO_CORE}/shader/decode/texture.cpp"
    "${VIDEO_CORE}/shader/decode/other.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
    "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
    "${VIDEO_CORE}/shader/decode/shift.cpp"
    "${VIDEO_CORE}/shader/decode/video.cpp"
    "${VIDEO_CORE}/shader/decode/warp.cpp"
    "${VIDEO_CORE}/shader/decode/xmad.cpp"
    "${VIDEO_CORE}/shader/ast.cpp"
    "${VIDEO_CORE}/shader/ast.h"
    "${VIDEO_CORE}/shader/compiler_settings.cpp"
    "${VIDEO_CORE}/shader/compiler_settings.h"
    "${VIDEO_CORE}/shader/control_flow.cpp"
    "${VIDEO_CORE}/shader/control_flow.h"
    "${VIDEO_CORE}/shader/decode.cpp"
    "${VIDEO_CORE}/shader/expr.cpp"
    "${VIDEO_CORE}/shader/expr.h"
    "${VIDEO_CORE}/shader/node.h"
    "${VIDEO_CORE}/shader/node_helper.cpp"
    "${VIDEO_CORE}/shader/node_helper.h"
    "${VIDEO_CORE}/shader/registry.cpp"
    "${VIDEO_CORE}/shader/registry.h"
    "${VIDEO_CORE}/shader/shader_ir.cpp"
    "${VIDEO_CORE}/shader/shader_ir.h"
    "${VIDEO_CORE}/shader/track.cpp"
    "${VIDEO_CORE}/shader/transform_feedback.cpp"
    "${VIDEO_CORE}/shader/transform_feedback.h"
 )
 set(COMBINED "")
 foreach (F IN LISTS HASH_FILES)
--- a/externals/Vulkan-Headers
+++ b/externals/Vulkan-Headers
@ -1 +1 @@
-Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0
+Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6
--- a/externals/sirit
+++ b/externals/sirit
@ -1 +1 @@
-Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a
+Subproject commit a39596358a3a5488c06554c0c15184a6af71e433
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@ -32,61 +32,7 @@ add_custom_command(OUTPUT scm_rev.cpp
    DEPENDS
      # WARNING! It was too much work to try and make a common location for this list,
      # so if you need to change it, please update CMakeModules/GenerateSCMRev.cmake as well
-      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.cpp"
+      # ...
      "${VIDEO_CORE}/renderer_opengl/gl_arb_decompiler.h"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.cpp"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_cache.h"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.cpp"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_decompiler.h"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.cpp"
      "${VIDEO_CORE}/renderer_opengl/gl_shader_disk_cache.h"
      "${VIDEO_CORE}/shader/decode/arithmetic.cpp"
      "${VIDEO_CORE}/shader/decode/arithmetic_half.cpp"
      "${VIDEO_CORE}/shader/decode/arithmetic_half_immediate.cpp"
      "${VIDEO_CORE}/shader/decode/arithmetic_immediate.cpp"
      "${VIDEO_CORE}/shader/decode/arithmetic_integer.cpp"
      "${VIDEO_CORE}/shader/decode/arithmetic_integer_immediate.cpp"
      "${VIDEO_CORE}/shader/decode/bfe.cpp"
      "${VIDEO_CORE}/shader/decode/bfi.cpp"
      "${VIDEO_CORE}/shader/decode/conversion.cpp"
      "${VIDEO_CORE}/shader/decode/ffma.cpp"
      "${VIDEO_CORE}/shader/decode/float_set.cpp"
      "${VIDEO_CORE}/shader/decode/float_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/half_set.cpp"
      "${VIDEO_CORE}/shader/decode/half_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/hfma2.cpp"
      "${VIDEO_CORE}/shader/decode/image.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set.cpp"
      "${VIDEO_CORE}/shader/decode/integer_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/memory.cpp"
      "${VIDEO_CORE}/shader/decode/texture.cpp"
      "${VIDEO_CORE}/shader/decode/other.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/predicate_set_register.cpp"
      "${VIDEO_CORE}/shader/decode/register_set_predicate.cpp"
      "${VIDEO_CORE}/shader/decode/shift.cpp"
      "${VIDEO_CORE}/shader/decode/video.cpp"
      "${VIDEO_CORE}/shader/decode/warp.cpp"
      "${VIDEO_CORE}/shader/decode/xmad.cpp"
      "${VIDEO_CORE}/shader/ast.cpp"
      "${VIDEO_CORE}/shader/ast.h"
      "${VIDEO_CORE}/shader/compiler_settings.cpp"
      "${VIDEO_CORE}/shader/compiler_settings.h"
      "${VIDEO_CORE}/shader/control_flow.cpp"
      "${VIDEO_CORE}/shader/control_flow.h"
      "${VIDEO_CORE}/shader/decode.cpp"
      "${VIDEO_CORE}/shader/expr.cpp"
      "${VIDEO_CORE}/shader/expr.h"
      "${VIDEO_CORE}/shader/node.h"
      "${VIDEO_CORE}/shader/node_helper.cpp"
      "${VIDEO_CORE}/shader/node_helper.h"
      "${VIDEO_CORE}/shader/registry.cpp"
      "${VIDEO_CORE}/shader/registry.h"
      "${VIDEO_CORE}/shader/shader_ir.cpp"
      "${VIDEO_CORE}/shader/shader_ir.h"
      "${VIDEO_CORE}/shader/track.cpp"
      "${VIDEO_CORE}/shader/transform_feedback.cpp"
      "${VIDEO_CORE}/shader/transform_feedback.h"
      # and also check that the scm_rev files haven't changed
      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.cpp.in"
      "${CMAKE_CURRENT_SOURCE_DIR}/scm_rev.h"
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@ -29,7 +29,6 @@ add_library(video_core STATIC
    dirty_flags.h
    dma_pusher.cpp
    dma_pusher.h
    engines/const_buffer_engine_interface.h
    engines/const_buffer_info.h
    engines/engine_interface.h
    engines/engine_upload.cpp
@ -61,8 +60,6 @@ add_library(video_core STATIC
    gpu.h
    gpu_thread.cpp
    gpu_thread.h
    guest_driver.cpp
    guest_driver.h
    memory_manager.cpp
    memory_manager.h
    query_cache.h
@ -71,8 +68,6 @@ add_library(video_core STATIC
    rasterizer_interface.h
    renderer_base.cpp
    renderer_base.h
    renderer_opengl/gl_arb_decompiler.cpp
    renderer_opengl/gl_arb_decompiler.h
    renderer_opengl/gl_buffer_cache.cpp
    renderer_opengl/gl_buffer_cache.h
    renderer_opengl/gl_device.cpp
@ -85,10 +80,6 @@ add_library(video_core STATIC
    renderer_opengl/gl_resource_manager.h
    renderer_opengl/gl_shader_cache.cpp
    renderer_opengl/gl_shader_cache.h
    renderer_opengl/gl_shader_decompiler.cpp
    renderer_opengl/gl_shader_decompiler.h
    renderer_opengl/gl_shader_disk_cache.cpp
    renderer_opengl/gl_shader_disk_cache.h
    renderer_opengl/gl_shader_manager.cpp
    renderer_opengl/gl_shader_manager.h
    renderer_opengl/gl_shader_util.cpp
@ -128,8 +119,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_descriptor_pool.h
    renderer_vulkan/vk_fence_manager.cpp
    renderer_vulkan/vk_fence_manager.h
    renderer_vulkan/vk_graphics_pipeline.cpp
    renderer_vulkan/vk_graphics_pipeline.h
    renderer_vulkan/vk_master_semaphore.cpp
    renderer_vulkan/vk_master_semaphore.h
    renderer_vulkan/vk_pipeline_cache.cpp
@ -142,8 +131,6 @@ add_library(video_core STATIC
    renderer_vulkan/vk_resource_pool.h
    renderer_vulkan/vk_scheduler.cpp
    renderer_vulkan/vk_scheduler.h
    renderer_vulkan/vk_shader_decompiler.cpp
    renderer_vulkan/vk_shader_decompiler.h
    renderer_vulkan/vk_shader_util.cpp
    renderer_vulkan/vk_shader_util.h
    renderer_vulkan/vk_staging_buffer_pool.cpp
@ -159,57 +146,6 @@ add_library(video_core STATIC
    shader_cache.h
    shader_notify.cpp
    shader_notify.h
    shader/decode/arithmetic.cpp
    shader/decode/arithmetic_immediate.cpp
    shader/decode/bfe.cpp
    shader/decode/bfi.cpp
    shader/decode/shift.cpp
    shader/decode/arithmetic_integer.cpp
    shader/decode/arithmetic_integer_immediate.cpp
    shader/decode/arithmetic_half.cpp
    shader/decode/arithmetic_half_immediate.cpp
    shader/decode/ffma.cpp
    shader/decode/hfma2.cpp
    shader/decode/conversion.cpp
    shader/decode/memory.cpp
    shader/decode/texture.cpp
    shader/decode/image.cpp
    shader/decode/float_set_predicate.cpp
    shader/decode/integer_set_predicate.cpp
    shader/decode/half_set_predicate.cpp
    shader/decode/predicate_set_register.cpp
    shader/decode/predicate_set_predicate.cpp
    shader/decode/register_set_predicate.cpp
    shader/decode/float_set.cpp
    shader/decode/integer_set.cpp
    shader/decode/half_set.cpp
    shader/decode/video.cpp
    shader/decode/warp.cpp
    shader/decode/xmad.cpp
    shader/decode/other.cpp
    shader/ast.cpp
    shader/ast.h
    shader/async_shaders.cpp
    shader/async_shaders.h
    shader/compiler_settings.cpp
    shader/compiler_settings.h
    shader/control_flow.cpp
    shader/control_flow.h
    shader/decode.cpp
    shader/expr.cpp
    shader/expr.h
    shader/memory_util.cpp
    shader/memory_util.h
    shader/node_helper.cpp
    shader/node_helper.h
    shader/node.h
    shader/registry.cpp
    shader/registry.h
    shader/shader_ir.cpp
    shader/shader_ir.h
    shader/track.cpp
    shader/transform_feedback.cpp
    shader/transform_feedback.h
    surface.cpp
    surface.h
    texture_cache/accelerated_swizzle.cpp
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@ -1,103 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <type_traits>
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/guest_driver.h"
 #include "video_core/textures/texture.h"
 namespace Tegra::Engines {
 struct SamplerDescriptor {
    union {
        u32 raw = 0;
        BitField<0, 2, Tegra::Shader::TextureType> texture_type;
        BitField<2, 3, Tegra::Texture::ComponentType> r_type;
        BitField<5, 1, u32> is_array;
        BitField<6, 1, u32> is_buffer;
        BitField<7, 1, u32> is_shadow;
        BitField<8, 3, Tegra::Texture::ComponentType> g_type;
        BitField<11, 3, Tegra::Texture::ComponentType> b_type;
        BitField<14, 3, Tegra::Texture::ComponentType> a_type;
        BitField<17, 7, Tegra::Texture::TextureFormat> format;
    };
    bool operator==(const SamplerDescriptor& rhs) const noexcept {
        return raw == rhs.raw;
    }
    bool operator!=(const SamplerDescriptor& rhs) const noexcept {
        return !operator==(rhs);
    }
    static SamplerDescriptor FromTIC(const Tegra::Texture::TICEntry& tic) {
        using Tegra::Shader::TextureType;
        SamplerDescriptor result;
        result.format.Assign(tic.format.Value());
        result.r_type.Assign(tic.r_type.Value());
        result.g_type.Assign(tic.g_type.Value());
        result.b_type.Assign(tic.b_type.Value());
        result.a_type.Assign(tic.a_type.Value());
        switch (tic.texture_type.Value()) {
        case Tegra::Texture::TextureType::Texture1D:
            result.texture_type.Assign(TextureType::Texture1D);
            return result;
        case Tegra::Texture::TextureType::Texture2D:
            result.texture_type.Assign(TextureType::Texture2D);
            return result;
        case Tegra::Texture::TextureType::Texture3D:
            result.texture_type.Assign(TextureType::Texture3D);
            return result;
        case Tegra::Texture::TextureType::TextureCubemap:
            result.texture_type.Assign(TextureType::TextureCube);
            return result;
        case Tegra::Texture::TextureType::Texture1DArray:
            result.texture_type.Assign(TextureType::Texture1D);
            result.is_array.Assign(1);
            return result;
        case Tegra::Texture::TextureType::Texture2DArray:
            result.texture_type.Assign(TextureType::Texture2D);
            result.is_array.Assign(1);
            return result;
        case Tegra::Texture::TextureType::Texture1DBuffer:
            result.texture_type.Assign(TextureType::Texture1D);
            result.is_buffer.Assign(1);
            return result;
        case Tegra::Texture::TextureType::Texture2DNoMipmap:
            result.texture_type.Assign(TextureType::Texture2D);
            return result;
        case Tegra::Texture::TextureType::TextureCubeArray:
            result.texture_type.Assign(TextureType::TextureCube);
            result.is_array.Assign(1);
            return result;
        default:
            result.texture_type.Assign(TextureType::Texture2D);
            return result;
        }
    }
 };
 static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
 class ConstBufferEngineInterface {
 public:
    virtual ~ConstBufferEngineInterface() = default;
    virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
    virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
    virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                    u64 offset) const = 0;
    virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
    virtual u32 GetBoundBuffer() const = 0;
    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
    virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
 };
 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@ -57,53 +57,11 @@ void KeplerCompute::CallMultiMethod(u32 method, const u32* base_start, u32 amoun
    }
 }
 u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
    ASSERT(stage == ShaderType::Compute);
    const auto& buffer = launch_description.const_buffer_config[const_buffer];
    u32 result;
    std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
    return result;
 }
 SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
 }
 SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                       u64 offset) const {
    ASSERT(stage == ShaderType::Compute);
    const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
    const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
 }
 SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
    const Texture::TextureHandle tex_handle{handle};
    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
    return result;
 }
 VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
    return rasterizer->AccessGuestDriverProfile();
 }
 const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
    return rasterizer->AccessGuestDriverProfile();
 }
 void KeplerCompute::ProcessLaunch() {
    const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
    memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
                                   LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32));
-
+    rasterizer->DispatchCompute();
    const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start;
    LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr);
    rasterizer->DispatchCompute(code_addr);
 }
 Texture::TICEntry KeplerCompute::GetTICEntry(u32 tic_index) const {
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@ -10,7 +10,6 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/engines/shader_type.h"
@ -40,7 +39,7 @@ namespace Tegra::Engines {
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
    (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
-class KeplerCompute final : public ConstBufferEngineInterface, public EngineInterface {
+class KeplerCompute final : public EngineInterface {
 public:
    explicit KeplerCompute(Core::System& system, MemoryManager& memory_manager);
    ~KeplerCompute();
@ -209,23 +208,6 @@ public:
    void CallMultiMethod(u32 method, const u32* base_start, u32 amount,
                         u32 methods_pending) override;
    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                            u64 offset) const override;
    SamplerDescriptor AccessSampler(u32 handle) const override;
    u32 GetBoundBuffer() const override {
        return regs.tex_cb_index;
    }
    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
 private:
    void ProcessLaunch();
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@ -670,42 +670,4 @@ void Maxwell3D::ProcessClearBuffers() {
    rasterizer->Clear();
 }
 u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
    ASSERT(stage != ShaderType::Compute);
    const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
    const auto& buffer = shader_stage.const_buffers[const_buffer];
    return memory_manager.Read<u32>(buffer.address + offset);
 }
 SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
 }
 SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                   u64 offset) const {
    ASSERT(stage != ShaderType::Compute);
    const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
    const auto& tex_info_buffer = shader.const_buffers[const_buffer];
    const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
    return AccessSampler(memory_manager.Read<u32>(tex_info_address));
 }
 SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
    const Texture::TextureHandle tex_handle{handle};
    const Texture::TICEntry tic = GetTICEntry(tex_handle.tic_id);
    const Texture::TSCEntry tsc = GetTSCEntry(tex_handle.tsc_id);
    SamplerDescriptor result = SamplerDescriptor::FromTIC(tic);
    result.is_shadow.Assign(tsc.depth_compare_enabled.Value());
    return result;
 }
 VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
    return rasterizer->AccessGuestDriverProfile();
 }
 const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
    return rasterizer->AccessGuestDriverProfile();
 }
 } // namespace Tegra::Engines
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@ -17,7 +17,6 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/engine_interface.h"
 #include "video_core/engines/engine_upload.h"
@ -49,7 +48,7 @@ namespace Tegra::Engines {
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
-class Maxwell3D final : public ConstBufferEngineInterface, public EngineInterface {
+class Maxwell3D final : public EngineInterface {
 public:
    explicit Maxwell3D(Core::System& system, MemoryManager& memory_manager);
    ~Maxwell3D();
@ -1424,23 +1423,6 @@ public:
    void FlushMMEInlineDraw();
    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                            u64 offset) const override;
    SamplerDescriptor AccessSampler(u32 handle) const override;
    u32 GetBoundBuffer() const override {
        return regs.tex_cb_index;
    }
    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
    bool ShouldExecute() const {
        return execute_on;
    }
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@ -1,37 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <limits>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/guest_driver.h"
 namespace VideoCore {
 void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32> bound_offsets) {
    if (texture_handler_size) {
        return;
    }
    const std::size_t size = bound_offsets.size();
    if (size < 2) {
        return;
    }
    std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
    u32 min_val = std::numeric_limits<u32>::max();
    for (std::size_t i = 1; i < size; ++i) {
        if (bound_offsets[i] == bound_offsets[i - 1]) {
            continue;
        }
        const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
        min_val = std::min(min_val, new_min);
    }
    if (min_val > 2) {
        return;
    }
    texture_handler_size = min_texture_handler_size * min_val;
 }
 } // namespace VideoCore
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@ -1,46 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <optional>
 #include <vector>
 #include "common/common_types.h"
 namespace VideoCore {
 /**
 * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
 * information necessary for impossible to avoid HLE methods like shader tracks as they are
 * Entscheidungsproblems.
 */
 class GuestDriverProfile {
 public:
    explicit GuestDriverProfile() = default;
    explicit GuestDriverProfile(std::optional<u32> texture_handler_size_)
        : texture_handler_size{texture_handler_size_} {}
    void DeduceTextureHandlerSize(std::vector<u32> bound_offsets);
    u32 GetTextureHandlerSize() const {
        return texture_handler_size.value_or(default_texture_handler_size);
    }
    bool IsTextureHandlerSizeKnown() const {
        return texture_handler_size.has_value();
    }
 private:
    // Minimum size of texture handler any driver can use.
    static constexpr u32 min_texture_handler_size = 4;
    // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily use 4 bytes instead.
    // Thus, certain drivers may squish the size.
    static constexpr u32 default_texture_handler_size = 8;
    std::optional<u32> texture_handler_size = default_texture_handler_size;
 };
 } // namespace VideoCore
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@ -11,7 +11,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
 #include "video_core/guest_driver.h"
 namespace Tegra {
 class MemoryManager;
@ -45,7 +44,7 @@ public:
    virtual void Clear() = 0;
    /// Dispatches a compute shader invocation
-    virtual void DispatchCompute(GPUVAddr code_addr) = 0;
+    virtual void DispatchCompute() = 0;
    /// Resets the counter of a query
    virtual void ResetCounter(QueryType type) = 0;
@ -136,18 +135,5 @@ public:
    /// Initialize disk cached resources for the game being emulated
    virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
                                   const DiskResourceLoadCallback& callback) {}
    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
    [[nodiscard]] GuestDriverProfile& AccessGuestDriverProfile() {
        return guest_driver_profile;
    }
    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
    [[nodiscard]] const GuestDriverProfile& AccessGuestDriverProfile() const {
        return guest_driver_profile;
    }
 private:
    GuestDriverProfile guest_driver_profile{};
 };
 } // namespace VideoCore
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.cpp
--- a/src/video_core/renderer_opengl/gl_arb_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_arb_decompiler.h
@ -1,29 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <string>
 #include <string_view>
 #include "common/common_types.h"
 namespace Tegra::Engines {
 enum class ShaderType : u32;
 }
 namespace VideoCommon::Shader {
 class ShaderIR;
 class Registry;
 } // namespace VideoCommon::Shader
 namespace OpenGL {
 class Device;
 std::string DecompileAssemblyShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                                    const VideoCommon::Shader::Registry& registry,
                                    Tegra::Engines::ShaderType stage, std::string_view identifier);
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@ -54,40 +54,6 @@ namespace {
 constexpr size_t NUM_SUPPORTED_VERTEX_ATTRIBUTES = 16;
 struct TextureHandle {
    constexpr TextureHandle(u32 data, bool via_header_index) {
        const Tegra::Texture::TextureHandle handle{data};
        image = handle.tic_id;
        sampler = via_header_index ? image : handle.tsc_id.Value();
    }
    u32 image;
    u32 sampler;
 };
 template <typename Engine, typename Entry>
 TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
                             ShaderType shader_type, size_t index = 0) {
    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
        if (entry.is_separated) {
            const u32 buffer_1 = entry.buffer;
            const u32 buffer_2 = entry.secondary_buffer;
            const u32 offset_1 = entry.offset;
            const u32 offset_2 = entry.secondary_offset;
            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
            return TextureHandle(handle_1 | handle_2, via_header_index);
        }
    }
    if (entry.is_bindless) {
        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
        return TextureHandle(raw, via_header_index);
    }
    const u32 buffer = engine.GetBoundBuffer();
    const u64 offset = (entry.offset + index) * sizeof(u32);
    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
 }
 /// Translates hardware transform feedback indices
 /// @param location Hardware location
 /// @return Pair of ARB_transform_feedback3 token stream first and third arguments
@ -119,44 +85,6 @@ std::pair<GLint, GLint> TransformFeedbackEnum(u8 location) {
 void oglEnable(GLenum cap, bool state) {
    (state ? glEnable : glDisable)(cap);
 }
 ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
    if (entry.is_buffer) {
        return ImageViewType::Buffer;
    }
    switch (entry.type) {
    case Tegra::Shader::TextureType::Texture1D:
        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
    case Tegra::Shader::TextureType::Texture2D:
        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
    case Tegra::Shader::TextureType::Texture3D:
        return ImageViewType::e3D;
    case Tegra::Shader::TextureType::TextureCube:
        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
    }
    UNREACHABLE();
    return ImageViewType::e2D;
 }
 ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
    switch (entry.type) {
    case Tegra::Shader::ImageType::Texture1D:
        return ImageViewType::e1D;
    case Tegra::Shader::ImageType::Texture1DArray:
        return ImageViewType::e1DArray;
    case Tegra::Shader::ImageType::Texture2D:
        return ImageViewType::e2D;
    case Tegra::Shader::ImageType::Texture2DArray:
        return ImageViewType::e2DArray;
    case Tegra::Shader::ImageType::Texture3D:
        return ImageViewType::e3D;
    case Tegra::Shader::ImageType::TextureBuffer:
        return ImageViewType::Buffer;
    }
    UNREACHABLE();
    return ImageViewType::e2D;
 }
 } // Anonymous namespace
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
@ -172,12 +100,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
      buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
      shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
      query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
-      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache),
+      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
      async_shaders(emu_window_) {
    if (device.UseAsynchronousShaders()) {
        async_shaders.AllocateWorkers();
    }
 }
 RasterizerOpenGL::~RasterizerOpenGL() = default;
@ -244,117 +167,8 @@ void RasterizerOpenGL::SyncVertexInstances() {
    }
 }
 void RasterizerOpenGL::SetupShaders(bool is_indexed) {
    u32 clip_distances = 0;
    std::array<Shader*, Maxwell::MaxShaderStage> shaders{};
    image_view_indices.clear();
    sampler_handles.clear();
    texture_cache.SynchronizeGraphicsDescriptors();
    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const auto& shader_config = maxwell3d.regs.shader_config[index];
        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
        // Skip stages that are not enabled
        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
            switch (program) {
            case Maxwell::ShaderProgram::Geometry:
                program_manager.UseGeometryShader(0);
                break;
            case Maxwell::ShaderProgram::Fragment:
                program_manager.UseFragmentShader(0);
                break;
            default:
                break;
            }
            continue;
        }
        // Currently this stages are not supported in the OpenGL backend.
        // TODO(Blinkhawk): Port tesselation shaders from Vulkan to OpenGL
        if (program == Maxwell::ShaderProgram::TesselationControl ||
            program == Maxwell::ShaderProgram::TesselationEval) {
            continue;
        }
        Shader* const shader = shader_cache.GetStageProgram(program, async_shaders);
        const GLuint program_handle = shader->IsBuilt() ? shader->GetHandle() : 0;
        switch (program) {
        case Maxwell::ShaderProgram::VertexA:
        case Maxwell::ShaderProgram::VertexB:
            program_manager.UseVertexShader(program_handle);
            break;
        case Maxwell::ShaderProgram::Geometry:
            program_manager.UseGeometryShader(program_handle);
            break;
        case Maxwell::ShaderProgram::Fragment:
            program_manager.UseFragmentShader(program_handle);
            break;
        default:
            UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
                              shader_config.enable.Value(), shader_config.offset);
            break;
        }
        // Stage indices are 0 - 5
        const size_t stage = index == 0 ? 0 : index - 1;
        shaders[stage] = shader;
        SetupDrawTextures(shader, stage);
        SetupDrawImages(shader, stage);
        buffer_cache.SetEnabledUniformBuffers(stage, shader->GetEntries().enabled_uniform_buffers);
        buffer_cache.UnbindGraphicsStorageBuffers(stage);
        u32 ssbo_index = 0;
        for (const auto& buffer : shader->GetEntries().global_memory_entries) {
            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
                                                   buffer.cbuf_offset, buffer.is_written);
            ++ssbo_index;
        }
        // Workaround for Intel drivers.
        // When a clip distance is enabled but not set in the shader it crops parts of the screen
        // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
        // clip distances only when it's written by a shader stage.
        clip_distances |= shader->GetEntries().clip_distances;
        // When VertexA is enabled, we have dual vertex shaders
        if (program == Maxwell::ShaderProgram::VertexA) {
            // VertexB was combined with VertexA, so we skip the VertexB iteration
            ++index;
        }
    }
    SyncClipEnabled(clip_distances);
    maxwell3d.dirty.flags[Dirty::Shaders] = false;
    buffer_cache.UpdateGraphicsBuffers(is_indexed);
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
    buffer_cache.BindHostGeometryBuffers(is_indexed);
    size_t image_view_index = 0;
    size_t texture_index = 0;
    size_t image_index = 0;
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        const Shader* const shader = shaders[stage];
        if (!shader) {
            continue;
        }
        buffer_cache.BindHostStageBuffers(stage);
        const auto& base = device.GetBaseBindings(stage);
        BindTextures(shader->GetEntries(), base.sampler, base.image, image_view_index,
                     texture_index, image_index);
    }
 }
 void RasterizerOpenGL::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
-                                         const VideoCore::DiskResourceLoadCallback& callback) {
+                                         const VideoCore::DiskResourceLoadCallback& callback) {}
    shader_cache.LoadDiskCache(title_id, stop_loading, callback);
 }
 void RasterizerOpenGL::Clear() {
    MICROPROFILE_SCOPE(OpenGL_Clears);
@ -434,7 +248,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    // Setup shaders and their used resources.
    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    SetupShaders(is_indexed);
    texture_cache.UpdateRenderTargets(false);
    state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
@ -488,27 +301,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
    gpu.TickWork();
 }
-void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
+void RasterizerOpenGL::DispatchCompute() {
-    Shader* const kernel = shader_cache.GetComputeKernel(code_addr);
+    UNREACHABLE_MSG("Not implemented");
    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    BindComputeTextures(kernel);
    const auto& entries = kernel->GetEntries();
    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
    buffer_cache.UnbindComputeStorageBuffers();
    u32 ssbo_index = 0;
    for (const auto& buffer : entries.global_memory_entries) {
        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
                                              buffer.is_written);
        ++ssbo_index;
    }
    buffer_cache.UpdateComputeBuffers();
    buffer_cache.BindHostComputeBuffers();
    const auto& launch_desc = kepler_compute.launch_description;
    glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
    ++num_queued_commands;
 }
 void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@ -726,106 +520,6 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
    return true;
 }
 void RasterizerOpenGL::BindComputeTextures(Shader* kernel) {
    image_view_indices.clear();
    sampler_handles.clear();
    texture_cache.SynchronizeComputeDescriptors();
    SetupComputeTextures(kernel);
    SetupComputeImages(kernel);
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
    program_manager.BindCompute(kernel->GetHandle());
    size_t image_view_index = 0;
    size_t texture_index = 0;
    size_t image_index = 0;
    BindTextures(kernel->GetEntries(), 0, 0, image_view_index, texture_index, image_index);
 }
 void RasterizerOpenGL::BindTextures(const ShaderEntries& entries, GLuint base_texture,
                                    GLuint base_image, size_t& image_view_index,
                                    size_t& texture_index, size_t& image_index) {
    const GLuint* const samplers = sampler_handles.data() + texture_index;
    const GLuint* const textures = texture_handles.data() + texture_index;
    const GLuint* const images = image_handles.data() + image_index;
    const size_t num_samplers = entries.samplers.size();
    for (const auto& sampler : entries.samplers) {
        for (size_t i = 0; i < sampler.size; ++i) {
            const ImageViewId image_view_id = image_view_ids[image_view_index++];
            const ImageView& image_view = texture_cache.GetImageView(image_view_id);
            const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(sampler));
            texture_handles[texture_index++] = handle;
        }
    }
    const size_t num_images = entries.images.size();
    for (size_t unit = 0; unit < num_images; ++unit) {
        // TODO: Mark as modified
        const ImageViewId image_view_id = image_view_ids[image_view_index++];
        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
        const GLuint handle = image_view.Handle(ImageViewTypeFromEntry(entries.images[unit]));
        image_handles[image_index] = handle;
        ++image_index;
    }
    if (num_samplers > 0) {
        glBindSamplers(base_texture, static_cast<GLsizei>(num_samplers), samplers);
        glBindTextures(base_texture, static_cast<GLsizei>(num_samplers), textures);
    }
    if (num_images > 0) {
        glBindImageTextures(base_image, static_cast<GLsizei>(num_images), images);
    }
 }
 void RasterizerOpenGL::SetupDrawTextures(const Shader* shader, size_t stage_index) {
    const bool via_header_index =
        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : shader->GetEntries().samplers) {
        const auto shader_type = static_cast<ShaderType>(stage_index);
        for (size_t index = 0; index < entry.size; ++index) {
            const auto handle =
                GetTextureInfo(maxwell3d, via_header_index, entry, shader_type, index);
            const Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
            sampler_handles.push_back(sampler->Handle());
            image_view_indices.push_back(handle.image);
        }
    }
 }
 void RasterizerOpenGL::SetupComputeTextures(const Shader* kernel) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : kernel->GetEntries().samplers) {
        for (size_t i = 0; i < entry.size; ++i) {
            const auto handle =
                GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute, i);
            const Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
            sampler_handles.push_back(sampler->Handle());
            image_view_indices.push_back(handle.image);
        }
    }
 }
 void RasterizerOpenGL::SetupDrawImages(const Shader* shader, size_t stage_index) {
    const bool via_header_index =
        maxwell3d.regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : shader->GetEntries().images) {
        const auto shader_type = static_cast<ShaderType>(stage_index);
        const auto handle = GetTextureInfo(maxwell3d, via_header_index, entry, shader_type);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerOpenGL::SetupComputeImages(const Shader* shader) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : shader->GetEntries().images) {
        const auto handle =
            GetTextureInfo(kepler_compute, via_header_index, entry, ShaderType::Compute);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerOpenGL::SyncState() {
    SyncViewport();
    SyncRasterizeEnable();
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@ -28,11 +28,9 @@
 #include "video_core/renderer_opengl/gl_query_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_manager.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/gl_texture_cache.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/textures/texture.h"
 namespace Core::Memory {
@ -81,7 +79,7 @@ public:
    void Draw(bool is_indexed, bool is_instanced) override;
    void Clear() override;
-    void DispatchCompute(GPUVAddr code_addr) override;
+    void DispatchCompute() override;
    void ResetCounter(VideoCore::QueryType type) override;
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@ -118,36 +116,11 @@ public:
        return num_queued_commands > 0;
    }
    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
        return async_shaders;
    }
    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
        return async_shaders;
    }
 private:
    static constexpr size_t MAX_TEXTURES = 192;
    static constexpr size_t MAX_IMAGES = 48;
    static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;
    void BindComputeTextures(Shader* kernel);
    void BindTextures(const ShaderEntries& entries, GLuint base_texture, GLuint base_image,
                      size_t& image_view_index, size_t& texture_index, size_t& image_index);
    /// Configures the current textures to use for the draw command.
    void SetupDrawTextures(const Shader* shader, size_t stage_index);
    /// Configures the textures used in a compute shader.
    void SetupComputeTextures(const Shader* kernel);
    /// Configures images in a graphics shader.
    void SetupDrawImages(const Shader* shader, size_t stage_index);
    /// Configures images in a compute shader.
    void SetupComputeImages(const Shader* shader);
    /// Syncs state to match guest's
    void SyncState();
@ -230,8 +203,6 @@ private:
    /// End a transform feedback
    void EndTransformFeedback();
    void SetupShaders(bool is_indexed);
    Tegra::GPU& gpu;
    Tegra::Engines::Maxwell3D& maxwell3d;
    Tegra::Engines::KeplerCompute& kepler_compute;
@ -251,8 +222,6 @@ private:
    AccelerateDMA accelerate_dma;
    FenceManagerOpenGL fence_manager;
    VideoCommon::Shader::AsyncShaders async_shaders;
    boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
    std::array<ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
    boost::container::static_vector<GLuint, MAX_TEXTURES> sampler_handles;
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@ -20,307 +20,19 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_arb_decompiler.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
 #include "video_core/shader_notify.h"
 namespace OpenGL {
-using Tegra::Engines::ShaderType;
+Shader::Shader() = default;
 using VideoCommon::Shader::GetShaderAddress;
 using VideoCommon::Shader::GetShaderCode;
 using VideoCommon::Shader::GetUniqueIdentifier;
 using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
 using VideoCommon::Shader::STAGE_MAIN_OFFSET;
 namespace {
 constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
 /// Gets the shader type from a Maxwell program type
 constexpr GLenum GetGLShaderType(ShaderType shader_type) {
    switch (shader_type) {
    case ShaderType::Vertex:
        return GL_VERTEX_SHADER;
    case ShaderType::Geometry:
        return GL_GEOMETRY_SHADER;
    case ShaderType::Fragment:
        return GL_FRAGMENT_SHADER;
    case ShaderType::Compute:
        return GL_COMPUTE_SHADER;
    default:
        return GL_NONE;
    }
 }
 constexpr const char* GetShaderTypeName(ShaderType shader_type) {
    switch (shader_type) {
    case ShaderType::Vertex:
        return "VS";
    case ShaderType::TesselationControl:
        return "HS";
    case ShaderType::TesselationEval:
        return "DS";
    case ShaderType::Geometry:
        return "GS";
    case ShaderType::Fragment:
        return "FS";
    case ShaderType::Compute:
        return "CS";
    }
    return "UNK";
 }
 constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
    switch (program_type) {
    case Maxwell::ShaderProgram::VertexA:
    case Maxwell::ShaderProgram::VertexB:
        return ShaderType::Vertex;
    case Maxwell::ShaderProgram::TesselationControl:
        return ShaderType::TesselationControl;
    case Maxwell::ShaderProgram::TesselationEval:
        return ShaderType::TesselationEval;
    case Maxwell::ShaderProgram::Geometry:
        return ShaderType::Geometry;
    case Maxwell::ShaderProgram::Fragment:
        return ShaderType::Fragment;
    }
    return {};
 }
 constexpr GLenum AssemblyEnum(ShaderType shader_type) {
    switch (shader_type) {
    case ShaderType::Vertex:
        return GL_VERTEX_PROGRAM_NV;
    case ShaderType::TesselationControl:
        return GL_TESS_CONTROL_PROGRAM_NV;
    case ShaderType::TesselationEval:
        return GL_TESS_EVALUATION_PROGRAM_NV;
    case ShaderType::Geometry:
        return GL_GEOMETRY_PROGRAM_NV;
    case ShaderType::Fragment:
        return GL_FRAGMENT_PROGRAM_NV;
    case ShaderType::Compute:
        return GL_COMPUTE_PROGRAM_NV;
    }
    return {};
 }
 std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
    return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
 }
 std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
    const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
    const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
                                                           entry.graphics_info, entry.compute_info};
    auto registry = std::make_shared<Registry>(entry.type, info);
    for (const auto& [address, value] : entry.keys) {
        const auto [buffer, offset] = address;
        registry->InsertKey(buffer, offset, value);
    }
    for (const auto& [offset, sampler] : entry.bound_samplers) {
        registry->InsertBoundSampler(offset, sampler);
    }
    for (const auto& [key, sampler] : entry.bindless_samplers) {
        const auto [buffer, offset] = key;
        registry->InsertBindlessSampler(buffer, offset, sampler);
    }
    return registry;
 }
 std::unordered_set<GLenum> GetSupportedFormats() {
    GLint num_formats;
    glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
    std::vector<GLint> formats(num_formats);
    glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
    std::unordered_set<GLenum> supported_formats;
    for (const GLint format : formats) {
        supported_formats.insert(static_cast<GLenum>(format));
    }
    return supported_formats;
 }
 } // Anonymous namespace
 ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
                             const ShaderIR& ir, const Registry& registry, bool hint_retrievable) {
    if (device.UseDriverCache()) {
        // Ignore hint retrievable if we are using the driver cache
        hint_retrievable = false;
    }
    const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
    LOG_INFO(Render_OpenGL, "{}", shader_id);
    auto program = std::make_shared<ProgramHandle>();
    if (device.UseAssemblyShaders()) {
        const std::string arb =
            DecompileAssemblyShader(device, ir, registry, shader_type, shader_id);
        GLuint& arb_prog = program->assembly_program.handle;
 // Commented out functions signal OpenGL errors but are compatible with apitrace.
 // Use them only to capture and replay on apitrace.
 #if 0
        glGenProgramsNV(1, &arb_prog);
        glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
                        reinterpret_cast<const GLubyte*>(arb.data()));
 #else
        glGenProgramsARB(1, &arb_prog);
        glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
                                static_cast<GLsizei>(arb.size()), arb.data());
 #endif
        const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
        if (err && *err) {
            LOG_CRITICAL(Render_OpenGL, "{}", err);
            LOG_INFO(Render_OpenGL, "\n{}", arb);
        }
    } else {
        const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
        OGLShader shader;
        shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
        program->source_program.Create(true, hint_retrievable, shader.handle);
    }
    return program;
 }
 Shader::Shader(std::shared_ptr<Registry> registry_, ShaderEntries entries_,
               ProgramSharedPtr program_, bool is_built_)
    : registry{std::move(registry_)}, entries{std::move(entries_)}, program{std::move(program_)},
      is_built{is_built_} {
    handle = program->assembly_program.handle;
    if (handle == 0) {
        handle = program->source_program.handle;
    }
    if (is_built) {
        ASSERT(handle != 0);
    }
 }
 Shader::~Shader() = default;
 GLuint Shader::GetHandle() const {
    DEBUG_ASSERT(registry->IsConsistent());
    return handle;
 }
 bool Shader::IsBuilt() const {
    return is_built;
 }
 void Shader::AsyncOpenGLBuilt(OGLProgram new_program) {
    program->source_program = std::move(new_program);
    handle = program->source_program.handle;
    is_built = true;
 }
 void Shader::AsyncGLASMBuilt(OGLAssemblyProgram new_program) {
    program->assembly_program = std::move(new_program);
    handle = program->assembly_program.handle;
    is_built = true;
 }
 std::unique_ptr<Shader> Shader::CreateStageFromMemory(
    const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code,
    ProgramCode code_b, VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr) {
    const auto shader_type = GetShaderType(program_type);
    auto& gpu = params.gpu;
    gpu.ShaderNotify().MarkSharderBuilding();
    auto registry = std::make_shared<Registry>(shader_type, gpu.Maxwell3D());
    if (!async_shaders.IsShaderAsync(gpu) || !params.device.UseAsynchronousShaders()) {
        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
        // TODO(Rodrigo): Handle VertexA shaders
        // std::optional<ShaderIR> ir_b;
        // if (!code_b.empty()) {
        //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
        // }
        auto program =
            BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
        ShaderDiskCacheEntry entry;
        entry.type = shader_type;
        entry.code = std::move(code);
        entry.code_b = std::move(code_b);
        entry.unique_identifier = params.unique_identifier;
        entry.bound_buffer = registry->GetBoundBuffer();
        entry.graphics_info = registry->GetGraphicsInfo();
        entry.keys = registry->GetKeys();
        entry.bound_samplers = registry->GetBoundSamplers();
        entry.bindless_samplers = registry->GetBindlessSamplers();
        params.disk_cache.SaveEntry(std::move(entry));
        gpu.ShaderNotify().MarkShaderComplete();
        return std::unique_ptr<Shader>(new Shader(std::move(registry),
                                                  MakeEntries(params.device, ir, shader_type),
                                                  std::move(program), true));
    } else {
        // Required for entries
        const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
        auto entries = MakeEntries(params.device, ir, shader_type);
        async_shaders.QueueOpenGLShader(params.device, shader_type, params.unique_identifier,
                                        std::move(code), std::move(code_b), STAGE_MAIN_OFFSET,
                                        COMPILER_SETTINGS, *registry, cpu_addr);
        auto program = std::make_shared<ProgramHandle>();
        return std::unique_ptr<Shader>(
            new Shader(std::move(registry), std::move(entries), std::move(program), false));
    }
 }
 std::unique_ptr<Shader> Shader::CreateKernelFromMemory(const ShaderParameters& params,
                                                       ProgramCode code) {
    auto& gpu = params.gpu;
    gpu.ShaderNotify().MarkSharderBuilding();
    auto registry = std::make_shared<Registry>(ShaderType::Compute, params.engine);
    const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
    const u64 uid = params.unique_identifier;
    auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
    ShaderDiskCacheEntry entry;
    entry.type = ShaderType::Compute;
    entry.code = std::move(code);
    entry.unique_identifier = uid;
    entry.bound_buffer = registry->GetBoundBuffer();
    entry.compute_info = registry->GetComputeInfo();
    entry.keys = registry->GetKeys();
    entry.bound_samplers = registry->GetBoundSamplers();
    entry.bindless_samplers = registry->GetBindlessSamplers();
    params.disk_cache.SaveEntry(std::move(entry));
    gpu.ShaderNotify().MarkShaderComplete();
    return std::unique_ptr<Shader>(new Shader(std::move(registry),
                                              MakeEntries(params.device, ir, ShaderType::Compute),
                                              std::move(program)));
 }
 std::unique_ptr<Shader> Shader::CreateFromCache(const ShaderParameters& params,
                                                const PrecompiledShader& precompiled_shader) {
    return std::unique_ptr<Shader>(new Shader(
        precompiled_shader.registry, precompiled_shader.entries, precompiled_shader.program));
 }
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
                                     Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
                                     Tegra::Engines::Maxwell3D& maxwell3d_,
@ -331,278 +43,4 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
 ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
 void ShaderCacheOpenGL::LoadDiskCache(u64 title_id, std::stop_token stop_loading,
                                      const VideoCore::DiskResourceLoadCallback& callback) {
    disk_cache.BindTitleID(title_id);
    const std::optional transferable = disk_cache.LoadTransferable();
    LOG_INFO(Render_OpenGL, "Total Shader Count: {}",
             transferable.has_value() ? transferable->size() : 0);
    if (!transferable) {
        return;
    }
    std::vector<ShaderDiskCachePrecompiled> gl_cache;
    if (!device.UseAssemblyShaders() && !device.UseDriverCache()) {
        // Only load precompiled cache when we are not using assembly shaders
        gl_cache = disk_cache.LoadPrecompiled();
    }
    const auto supported_formats = GetSupportedFormats();
    // Track if precompiled cache was altered during loading to know if we have to
    // serialize the virtual precompiled cache file back to the hard drive
    bool precompiled_cache_altered = false;
    // Inform the frontend about shader build initialization
    if (callback) {
        callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
    }
    std::mutex mutex;
    std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
    std::atomic_bool gl_cache_failed = false;
    const auto find_precompiled = [&gl_cache](u64 id) {
        return std::ranges::find(gl_cache, id, &ShaderDiskCachePrecompiled::unique_identifier);
    };
    const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
                            std::size_t end) {
        const auto scope = context->Acquire();
        for (std::size_t i = begin; i < end; ++i) {
            if (stop_loading.stop_requested()) {
                return;
            }
            const auto& entry = (*transferable)[i];
            const u64 uid = entry.unique_identifier;
            const auto it = find_precompiled(uid);
            const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
            const bool is_compute = entry.type == ShaderType::Compute;
            const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
            auto registry = MakeRegistry(entry);
            const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
            ProgramSharedPtr program;
            if (precompiled_entry) {
                // If the shader is precompiled, attempt to load it with
                program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
                if (!program) {
                    gl_cache_failed = true;
                }
            }
            if (!program) {
                // Otherwise compile it from GLSL
                program = BuildShader(device, entry.type, uid, ir, *registry, true);
            }
            PrecompiledShader shader;
            shader.program = std::move(program);
            shader.registry = std::move(registry);
            shader.entries = MakeEntries(device, ir, entry.type);
            std::scoped_lock lock{mutex};
            if (callback) {
                callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
                         transferable->size());
            }
            runtime_cache.emplace(entry.unique_identifier, std::move(shader));
        }
    };
    const std::size_t num_workers{std::max(1U, std::thread::hardware_concurrency())};
    const std::size_t bucket_size{transferable->size() / num_workers};
    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
    std::vector<std::thread> threads(num_workers);
    for (std::size_t i = 0; i < num_workers; ++i) {
        const bool is_last_worker = i + 1 == num_workers;
        const std::size_t start{bucket_size * i};
        const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
        // On some platforms the shared context has to be created from the GUI thread
        contexts[i] = emu_window.CreateSharedContext();
        threads[i] = std::thread(worker, contexts[i].get(), start, end);
    }
    for (auto& thread : threads) {
        thread.join();
    }
    if (gl_cache_failed) {
        // Invalidate the precompiled cache if a shader dumped shader was rejected
        disk_cache.InvalidatePrecompiled();
        precompiled_cache_altered = true;
        return;
    }
    if (stop_loading.stop_requested()) {
        return;
    }
    if (device.UseAssemblyShaders() || device.UseDriverCache()) {
        // Don't store precompiled binaries for assembly shaders or when using the driver cache
        return;
    }
    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
    // before precompiling them
    for (std::size_t i = 0; i < transferable->size(); ++i) {
        const u64 id = (*transferable)[i].unique_identifier;
        const auto it = find_precompiled(id);
        if (it == gl_cache.end()) {
            const GLuint program = runtime_cache.at(id).program->source_program.handle;
            disk_cache.SavePrecompiled(id, program);
            precompiled_cache_altered = true;
        }
    }
    if (precompiled_cache_altered) {
        disk_cache.SaveVirtualPrecompiledFile();
    }
 }
 ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
    const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
    const std::unordered_set<GLenum>& supported_formats) {
    if (!supported_formats.contains(precompiled_entry.binary_format)) {
        LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
        return {};
    }
    auto program = std::make_shared<ProgramHandle>();
    GLuint& handle = program->source_program.handle;
    handle = glCreateProgram();
    glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
    glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
                    static_cast<GLsizei>(precompiled_entry.binary.size()));
    GLint link_status;
    glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
    if (link_status == GL_FALSE) {
        LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
        return {};
    }
    return program;
 }
 Shader* ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program,
                                           VideoCommon::Shader::AsyncShaders& async_shaders) {
    if (!maxwell3d.dirty.flags[Dirty::Shaders]) {
        auto* last_shader = last_shaders[static_cast<std::size_t>(program)];
        if (last_shader->IsBuilt()) {
            return last_shader;
        }
    }
    const GPUVAddr address{GetShaderAddress(maxwell3d, program)};
    if (device.UseAsynchronousShaders() && async_shaders.HasCompletedWork()) {
        auto completed_work = async_shaders.GetCompletedWork();
        for (auto& work : completed_work) {
            Shader* shader = TryGet(work.cpu_address);
            gpu.ShaderNotify().MarkShaderComplete();
            if (shader == nullptr) {
                continue;
            }
            using namespace VideoCommon::Shader;
            if (work.backend == AsyncShaders::Backend::OpenGL) {
                shader->AsyncOpenGLBuilt(std::move(work.program.opengl));
            } else if (work.backend == AsyncShaders::Backend::GLASM) {
                shader->AsyncGLASMBuilt(std::move(work.program.glasm));
            }
            auto& registry = shader->GetRegistry();
            ShaderDiskCacheEntry entry;
            entry.type = work.shader_type;
            entry.code = std::move(work.code);
            entry.code_b = std::move(work.code_b);
            entry.unique_identifier = work.uid;
            entry.bound_buffer = registry.GetBoundBuffer();
            entry.graphics_info = registry.GetGraphicsInfo();
            entry.keys = registry.GetKeys();
            entry.bound_samplers = registry.GetBoundSamplers();
            entry.bindless_samplers = registry.GetBindlessSamplers();
            disk_cache.SaveEntry(std::move(entry));
        }
    }
    // Look up shader in the cache based on address
    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(address)};
    if (Shader* const shader{cpu_addr ? TryGet(*cpu_addr) : null_shader.get()}) {
        return last_shaders[static_cast<std::size_t>(program)] = shader;
    }
    const u8* const host_ptr{gpu_memory.GetPointer(address)};
    // No shader found - create a new one
    ProgramCode code{GetShaderCode(gpu_memory, address, host_ptr, false)};
    ProgramCode code_b;
    if (program == Maxwell::ShaderProgram::VertexA) {
        const GPUVAddr address_b{GetShaderAddress(maxwell3d, Maxwell::ShaderProgram::VertexB)};
        const u8* host_ptr_b = gpu_memory.GetPointer(address_b);
        code_b = GetShaderCode(gpu_memory, address_b, host_ptr_b, false);
    }
    const std::size_t code_size = code.size() * sizeof(u64);
    const u64 unique_identifier = GetUniqueIdentifier(
        GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
    const ShaderParameters params{gpu,       maxwell3d, disk_cache,       device,
                                  *cpu_addr, host_ptr,  unique_identifier};
    std::unique_ptr<Shader> shader;
    const auto found = runtime_cache.find(unique_identifier);
    if (found == runtime_cache.end()) {
        shader = Shader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b),
                                               async_shaders, cpu_addr.value_or(0));
    } else {
        shader = Shader::CreateFromCache(params, found->second);
    }
    Shader* const result = shader.get();
    if (cpu_addr) {
        Register(std::move(shader), *cpu_addr, code_size);
    } else {
        null_shader = std::move(shader);
    }
    return last_shaders[static_cast<std::size_t>(program)] = result;
 }
 Shader* ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
    const std::optional<VAddr> cpu_addr{gpu_memory.GpuToCpuAddress(code_addr)};
    if (Shader* const kernel = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get()) {
        return kernel;
    }
    // No kernel found, create a new one
    const u8* host_ptr{gpu_memory.GetPointer(code_addr)};
    ProgramCode code{GetShaderCode(gpu_memory, code_addr, host_ptr, true)};
    const std::size_t code_size{code.size() * sizeof(u64)};
    const u64 unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
    const ShaderParameters params{gpu,       kepler_compute, disk_cache,       device,
                                  *cpu_addr, host_ptr,       unique_identifier};
    std::unique_ptr<Shader> kernel;
    const auto found = runtime_cache.find(unique_identifier);
    if (found == runtime_cache.end()) {
        kernel = Shader::CreateKernelFromMemory(params, std::move(code));
    } else {
        kernel = Shader::CreateFromCache(params, found->second);
    }
    Shader* const result = kernel.get();
    if (cpu_addr) {
        Register(std::move(kernel), *cpu_addr, code_size);
    } else {
        null_kernel = std::move(kernel);
    }
    return result;
 }
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@ -19,10 +19,6 @@
 #include "common/common_types.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
 namespace Tegra {
@ -33,10 +29,6 @@ namespace Core::Frontend {
 class EmuWindow;
 }
 namespace VideoCommon::Shader {
 class AsyncShaders;
 }
 namespace OpenGL {
 class Device;
@ -44,77 +36,10 @@ class RasterizerOpenGL;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-struct ProgramHandle {
+class Shader {
    OGLProgram source_program;
    OGLAssemblyProgram assembly_program;
 };
 using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
 struct PrecompiledShader {
    ProgramSharedPtr program;
    std::shared_ptr<VideoCommon::Shader::Registry> registry;
    ShaderEntries entries;
 };
 struct ShaderParameters {
    Tegra::GPU& gpu;
    Tegra::Engines::ConstBufferEngineInterface& engine;
    ShaderDiskCacheOpenGL& disk_cache;
    const Device& device;
    VAddr cpu_addr;
    const u8* host_ptr;
    u64 unique_identifier;
 };
 ProgramSharedPtr BuildShader(const Device& device, Tegra::Engines::ShaderType shader_type,
                             u64 unique_identifier, const VideoCommon::Shader::ShaderIR& ir,
                             const VideoCommon::Shader::Registry& registry,
                             bool hint_retrievable = false);
 class Shader final {
 public:
    explicit Shader();
    ~Shader();
    /// Gets the GL program handle for the shader
    GLuint GetHandle() const;
    bool IsBuilt() const;
    /// Gets the shader entries for the shader
    const ShaderEntries& GetEntries() const {
        return entries;
    }
    const VideoCommon::Shader::Registry& GetRegistry() const {
        return *registry;
    }
    /// Mark a OpenGL shader as built
    void AsyncOpenGLBuilt(OGLProgram new_program);
    /// Mark a GLASM shader as built
    void AsyncGLASMBuilt(OGLAssemblyProgram new_program);
    static std::unique_ptr<Shader> CreateStageFromMemory(
        const ShaderParameters& params, Maxwell::ShaderProgram program_type,
        ProgramCode program_code, ProgramCode program_code_b,
        VideoCommon::Shader::AsyncShaders& async_shaders, VAddr cpu_addr);
    static std::unique_ptr<Shader> CreateKernelFromMemory(const ShaderParameters& params,
                                                          ProgramCode code);
    static std::unique_ptr<Shader> CreateFromCache(const ShaderParameters& params,
                                                   const PrecompiledShader& precompiled_shader);
 private:
    explicit Shader(std::shared_ptr<VideoCommon::Shader::Registry> registry, ShaderEntries entries,
                    ProgramSharedPtr program, bool is_built_ = true);
    std::shared_ptr<VideoCommon::Shader::Registry> registry;
    ShaderEntries entries;
    ProgramSharedPtr program;
    GLuint handle = 0;
    bool is_built{};
 };
 class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
@ -126,36 +51,13 @@ public:
                               Tegra::MemoryManager& gpu_memory_, const Device& device_);
    ~ShaderCacheOpenGL() override;
    /// Loads disk cache for the current game
    void LoadDiskCache(u64 title_id, std::stop_token stop_loading,
                       const VideoCore::DiskResourceLoadCallback& callback);
    /// Gets the current specified shader stage program
    Shader* GetStageProgram(Maxwell::ShaderProgram program,
                            VideoCommon::Shader::AsyncShaders& async_shaders);
    /// Gets a compute kernel in the passed address
    Shader* GetComputeKernel(GPUVAddr code_addr);
 private:
    ProgramSharedPtr GeneratePrecompiledProgram(
        const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
        const std::unordered_set<GLenum>& supported_formats);
    Core::Frontend::EmuWindow& emu_window;
    Tegra::GPU& gpu;
    Tegra::MemoryManager& gpu_memory;
    Tegra::Engines::Maxwell3D& maxwell3d;
    Tegra::Engines::KeplerCompute& kepler_compute;
    const Device& device;
    ShaderDiskCacheOpenGL disk_cache;
    std::unordered_map<u64, PrecompiledShader> runtime_cache;
    std::unique_ptr<Shader> null_shader;
    std::unique_ptr<Shader> null_kernel;
    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
 };
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@ -1,69 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <array>
 #include <string>
 #include <string_view>
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 namespace OpenGL {
 class Device;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using SamplerEntry = VideoCommon::Shader::SamplerEntry;
 using ImageEntry = VideoCommon::Shader::ImageEntry;
 class ConstBufferEntry : public VideoCommon::Shader::ConstBuffer {
 public:
    explicit ConstBufferEntry(u32 max_offset_, bool is_indirect_, u32 index_)
        : ConstBuffer{max_offset_, is_indirect_}, index{index_} {}
    u32 GetIndex() const {
        return index;
    }
 private:
    u32 index = 0;
 };
 struct GlobalMemoryEntry {
    constexpr explicit GlobalMemoryEntry(u32 cbuf_index_, u32 cbuf_offset_, bool is_read_,
                                         bool is_written_)
        : cbuf_index{cbuf_index_}, cbuf_offset{cbuf_offset_}, is_read{is_read_}, is_written{
                                                                                     is_written_} {}
    u32 cbuf_index = 0;
    u32 cbuf_offset = 0;
    bool is_read = false;
    bool is_written = false;
 };
 struct ShaderEntries {
    std::vector<ConstBufferEntry> const_buffers;
    std::vector<GlobalMemoryEntry> global_memory_entries;
    std::vector<SamplerEntry> samplers;
    std::vector<ImageEntry> images;
    std::size_t shader_length{};
    u32 clip_distances{};
    u32 enabled_uniform_buffers{};
 };
 ShaderEntries MakeEntries(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                          Tegra::Engines::ShaderType stage);
 std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
                            const VideoCommon::Shader::Registry& registry,
                            Tegra::Engines::ShaderType stage, std::string_view identifier,
                            std::string_view suffix = {});
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@ -1,482 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <cstring>
 #include <fmt/format.h>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/fs/file.h"
 #include "common/fs/fs.h"
 #include "common/fs/path_util.h"
 #include "common/logging/log.h"
 #include "common/scm_rev.h"
 #include "common/settings.h"
 #include "common/zstd_compression.h"
 #include "core/core.h"
 #include "core/hle/kernel/k_process.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 namespace OpenGL {
 using Tegra::Engines::ShaderType;
 using VideoCommon::Shader::BindlessSamplerMap;
 using VideoCommon::Shader::BoundSamplerMap;
 using VideoCommon::Shader::KeyMap;
 using VideoCommon::Shader::SeparateSamplerKey;
 using ShaderCacheVersionHash = std::array<u8, 64>;
 struct ConstBufferKey {
    u32 cbuf = 0;
    u32 offset = 0;
    u32 value = 0;
 };
 struct BoundSamplerEntry {
    u32 offset = 0;
    Tegra::Engines::SamplerDescriptor sampler;
 };
 struct SeparateSamplerEntry {
    u32 cbuf1 = 0;
    u32 cbuf2 = 0;
    u32 offset1 = 0;
    u32 offset2 = 0;
    Tegra::Engines::SamplerDescriptor sampler;
 };
 struct BindlessSamplerEntry {
    u32 cbuf = 0;
    u32 offset = 0;
    Tegra::Engines::SamplerDescriptor sampler;
 };
 namespace {
 constexpr u32 NativeVersion = 21;
 ShaderCacheVersionHash GetShaderCacheVersionHash() {
    ShaderCacheVersionHash hash{};
    const std::size_t length = std::min(std::strlen(Common::g_shader_cache_version), hash.size());
    std::memcpy(hash.data(), Common::g_shader_cache_version, length);
    return hash;
 }
 } // Anonymous namespace
 ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
 ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
 bool ShaderDiskCacheEntry::Load(Common::FS::IOFile& file) {
    if (!file.ReadObject(type)) {
        return false;
    }
    u32 code_size;
    u32 code_size_b;
    if (!file.ReadObject(code_size) || !file.ReadObject(code_size_b)) {
        return false;
    }
    code.resize(code_size);
    code_b.resize(code_size_b);
    if (file.Read(code) != code_size) {
        return false;
    }
    if (HasProgramA() && file.Read(code_b) != code_size_b) {
        return false;
    }
    u8 is_texture_handler_size_known;
    u32 texture_handler_size_value;
    u32 num_keys;
    u32 num_bound_samplers;
    u32 num_separate_samplers;
    u32 num_bindless_samplers;
    if (!file.ReadObject(unique_identifier) || !file.ReadObject(bound_buffer) ||
        !file.ReadObject(is_texture_handler_size_known) ||
        !file.ReadObject(texture_handler_size_value) || !file.ReadObject(graphics_info) ||
        !file.ReadObject(compute_info) || !file.ReadObject(num_keys) ||
        !file.ReadObject(num_bound_samplers) || !file.ReadObject(num_separate_samplers) ||
        !file.ReadObject(num_bindless_samplers)) {
        return false;
    }
    if (is_texture_handler_size_known) {
        texture_handler_size = texture_handler_size_value;
    }
    std::vector<ConstBufferKey> flat_keys(num_keys);
    std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
    std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
    std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
    if (file.Read(flat_keys) != flat_keys.size() ||
        file.Read(flat_bound_samplers) != flat_bound_samplers.size() ||
        file.Read(flat_separate_samplers) != flat_separate_samplers.size() ||
        file.Read(flat_bindless_samplers) != flat_bindless_samplers.size()) {
        return false;
    }
    for (const auto& entry : flat_keys) {
        keys.insert({{entry.cbuf, entry.offset}, entry.value});
    }
    for (const auto& entry : flat_bound_samplers) {
        bound_samplers.emplace(entry.offset, entry.sampler);
    }
    for (const auto& entry : flat_separate_samplers) {
        SeparateSamplerKey key;
        key.buffers = {entry.cbuf1, entry.cbuf2};
        key.offsets = {entry.offset1, entry.offset2};
        separate_samplers.emplace(key, entry.sampler);
    }
    for (const auto& entry : flat_bindless_samplers) {
        bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
    }
    return true;
 }
 bool ShaderDiskCacheEntry::Save(Common::FS::IOFile& file) const {
    if (!file.WriteObject(static_cast<u32>(type)) ||
        !file.WriteObject(static_cast<u32>(code.size())) ||
        !file.WriteObject(static_cast<u32>(code_b.size()))) {
        return false;
    }
    if (file.Write(code) != code.size()) {
        return false;
    }
    if (HasProgramA() && file.Write(code_b) != code_b.size()) {
        return false;
    }
    if (!file.WriteObject(unique_identifier) || !file.WriteObject(bound_buffer) ||
        !file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) ||
        !file.WriteObject(texture_handler_size.value_or(0)) || !file.WriteObject(graphics_info) ||
        !file.WriteObject(compute_info) || !file.WriteObject(static_cast<u32>(keys.size())) ||
        !file.WriteObject(static_cast<u32>(bound_samplers.size())) ||
        !file.WriteObject(static_cast<u32>(separate_samplers.size())) ||
        !file.WriteObject(static_cast<u32>(bindless_samplers.size()))) {
        return false;
    }
    std::vector<ConstBufferKey> flat_keys;
    flat_keys.reserve(keys.size());
    for (const auto& [address, value] : keys) {
        flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
    }
    std::vector<BoundSamplerEntry> flat_bound_samplers;
    flat_bound_samplers.reserve(bound_samplers.size());
    for (const auto& [address, sampler] : bound_samplers) {
        flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
    }
    std::vector<SeparateSamplerEntry> flat_separate_samplers;
    flat_separate_samplers.reserve(separate_samplers.size());
    for (const auto& [key, sampler] : separate_samplers) {
        SeparateSamplerEntry entry;
        std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
        std::tie(entry.offset1, entry.offset2) = key.offsets;
        entry.sampler = sampler;
        flat_separate_samplers.push_back(entry);
    }
    std::vector<BindlessSamplerEntry> flat_bindless_samplers;
    flat_bindless_samplers.reserve(bindless_samplers.size());
    for (const auto& [address, sampler] : bindless_samplers) {
        flat_bindless_samplers.push_back(
            BindlessSamplerEntry{address.first, address.second, sampler});
    }
    return file.Write(flat_keys) == flat_keys.size() &&
           file.Write(flat_bound_samplers) == flat_bound_samplers.size() &&
           file.Write(flat_separate_samplers) == flat_separate_samplers.size() &&
           file.Write(flat_bindless_samplers) == flat_bindless_samplers.size();
 }
 ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL() = default;
 ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
 void ShaderDiskCacheOpenGL::BindTitleID(u64 title_id_) {
    title_id = title_id_;
 }
 std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
    // Skip games without title id
    const bool has_title_id = title_id != 0;
    if (!Settings::values.use_disk_shader_cache.GetValue() || !has_title_id) {
        return std::nullopt;
    }
    Common::FS::IOFile file{GetTransferablePath(), Common::FS::FileAccessMode::Read,
                            Common::FS::FileType::BinaryFile};
    if (!file.IsOpen()) {
        LOG_INFO(Render_OpenGL, "No transferable shader cache found");
        is_usable = true;
        return std::nullopt;
    }
    u32 version{};
    if (!file.ReadObject(version)) {
        LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
        return std::nullopt;
    }
    if (version < NativeVersion) {
        LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
        file.Close();
        InvalidateTransferable();
        is_usable = true;
        return std::nullopt;
    }
    if (version > NativeVersion) {
        LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
                                   "of the emulator, skipping");
        return std::nullopt;
    }
    // Version is valid, load the shaders
    std::vector<ShaderDiskCacheEntry> entries;
    while (static_cast<u64>(file.Tell()) < file.GetSize()) {
        ShaderDiskCacheEntry& entry = entries.emplace_back();
        if (!entry.Load(file)) {
            LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
            return std::nullopt;
        }
    }
    is_usable = true;
    return {std::move(entries)};
 }
 std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
    if (!is_usable) {
        return {};
    }
    Common::FS::IOFile file{GetPrecompiledPath(), Common::FS::FileAccessMode::Read,
                            Common::FS::FileType::BinaryFile};
    if (!file.IsOpen()) {
        LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
        return {};
    }
    if (const auto result = LoadPrecompiledFile(file)) {
        return *result;
    }
    LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
    file.Close();
    InvalidatePrecompiled();
    return {};
 }
 std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
    Common::FS::IOFile& file) {
    // Read compressed file from disk and decompress to virtual precompiled cache file
    std::vector<u8> compressed(file.GetSize());
    if (file.Read(compressed) != file.GetSize()) {
        return std::nullopt;
    }
    const std::vector<u8> decompressed = Common::Compression::DecompressDataZSTD(compressed);
    SaveArrayToPrecompiled(decompressed.data(), decompressed.size());
    precompiled_cache_virtual_file_offset = 0;
    ShaderCacheVersionHash file_hash{};
    if (!LoadArrayFromPrecompiled(file_hash.data(), file_hash.size())) {
        precompiled_cache_virtual_file_offset = 0;
        return std::nullopt;
    }
    if (GetShaderCacheVersionHash() != file_hash) {
        LOG_INFO(Render_OpenGL, "Precompiled cache is from another version of the emulator");
        precompiled_cache_virtual_file_offset = 0;
        return std::nullopt;
    }
    std::vector<ShaderDiskCachePrecompiled> entries;
    while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
        u32 binary_size;
        auto& entry = entries.emplace_back();
        if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
            !LoadObjectFromPrecompiled(entry.binary_format) ||
            !LoadObjectFromPrecompiled(binary_size)) {
            return std::nullopt;
        }
        entry.binary.resize(binary_size);
        if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
            return std::nullopt;
        }
    }
    return entries;
 }
 void ShaderDiskCacheOpenGL::InvalidateTransferable() {
    if (!Common::FS::RemoveFile(GetTransferablePath())) {
        LOG_ERROR(Render_OpenGL, "Failed to invalidate transferable file={}",
                  Common::FS::PathToUTF8String(GetTransferablePath()));
    }
    InvalidatePrecompiled();
 }
 void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
    // Clear virtaul precompiled cache file
    precompiled_cache_virtual_file.Resize(0);
    if (!Common::FS::RemoveFile(GetPrecompiledPath())) {
        LOG_ERROR(Render_OpenGL, "Failed to invalidate precompiled file={}",
                  Common::FS::PathToUTF8String(GetPrecompiledPath()));
    }
 }
 void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
    if (!is_usable) {
        return;
    }
    const u64 id = entry.unique_identifier;
    if (stored_transferable.contains(id)) {
        // The shader already exists
        return;
    }
    Common::FS::IOFile file = AppendTransferableFile();
    if (!file.IsOpen()) {
        return;
    }
    if (!entry.Save(file)) {
        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
        file.Close();
        InvalidateTransferable();
        return;
    }
    stored_transferable.insert(id);
 }
 void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
    if (!is_usable) {
        return;
    }
    // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
    // when writing the dump. This should be done the moment I get access to write to the virtual
    // file.
    if (precompiled_cache_virtual_file.GetSize() == 0) {
        SavePrecompiledHeaderToVirtualPrecompiledCache();
    }
    GLint binary_length;
    glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
    GLenum binary_format;
    std::vector<u8> binary(binary_length);
    glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
    if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
        !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
        !SaveArrayToPrecompiled(binary.data(), binary.size())) {
        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
                  unique_identifier);
        InvalidatePrecompiled();
    }
 }
 Common::FS::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
    if (!EnsureDirectories()) {
        return {};
    }
    const auto transferable_path{GetTransferablePath()};
    const bool existed = Common::FS::Exists(transferable_path);
    Common::FS::IOFile file{transferable_path, Common::FS::FileAccessMode::Append,
                            Common::FS::FileType::BinaryFile};
    if (!file.IsOpen()) {
        LOG_ERROR(Render_OpenGL, "Failed to open transferable cache in path={}",
                  Common::FS::PathToUTF8String(transferable_path));
        return {};
    }
    if (!existed || file.GetSize() == 0) {
        // If the file didn't exist, write its version
        if (!file.WriteObject(NativeVersion)) {
            LOG_ERROR(Render_OpenGL, "Failed to write transferable cache version in path={}",
                      Common::FS::PathToUTF8String(transferable_path));
            return {};
        }
    }
    return file;
 }
 void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
    const auto hash{GetShaderCacheVersionHash()};
    if (!SaveArrayToPrecompiled(hash.data(), hash.size())) {
        LOG_ERROR(
            Render_OpenGL,
            "Failed to write precompiled cache version hash to virtual precompiled cache file");
    }
 }
 void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
    precompiled_cache_virtual_file_offset = 0;
    const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
    const std::vector<u8> compressed =
        Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
    const auto precompiled_path = GetPrecompiledPath();
    Common::FS::IOFile file{precompiled_path, Common::FS::FileAccessMode::Write,
                            Common::FS::FileType::BinaryFile};
    if (!file.IsOpen()) {
        LOG_ERROR(Render_OpenGL, "Failed to open precompiled cache in path={}",
                  Common::FS::PathToUTF8String(precompiled_path));
        return;
    }
    if (file.Write(compressed) != compressed.size()) {
        LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
                  Common::FS::PathToUTF8String(precompiled_path));
    }
 }
 bool ShaderDiskCacheOpenGL::EnsureDirectories() const {
    const auto CreateDir = [](const std::filesystem::path& dir) {
        if (!Common::FS::CreateDir(dir)) {
            LOG_ERROR(Render_OpenGL, "Failed to create directory={}",
                      Common::FS::PathToUTF8String(dir));
            return false;
        }
        return true;
    };
    return CreateDir(Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)) &&
           CreateDir(GetBaseDir()) && CreateDir(GetTransferableDir()) &&
           CreateDir(GetPrecompiledDir());
 }
 std::filesystem::path ShaderDiskCacheOpenGL::GetTransferablePath() const {
    return GetTransferableDir() / fmt::format("{}.bin", GetTitleID());
 }
 std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledPath() const {
    return GetPrecompiledDir() / fmt::format("{}.bin", GetTitleID());
 }
 std::filesystem::path ShaderDiskCacheOpenGL::GetTransferableDir() const {
    return GetBaseDir() / "transferable";
 }
 std::filesystem::path ShaderDiskCacheOpenGL::GetPrecompiledDir() const {
    return GetBaseDir() / "precompiled";
 }
 std::filesystem::path ShaderDiskCacheOpenGL::GetBaseDir() const {
    return Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir) / "opengl";
 }
 std::string ShaderDiskCacheOpenGL::GetTitleID() const {
    return fmt::format("{:016X}", title_id);
 }
 } // namespace OpenGL
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@ -1,176 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <filesystem>
 #include <optional>
 #include <string>
 #include <tuple>
 #include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
 #include <vector>
 #include <glad/glad.h>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "core/file_sys/vfs_vector.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/shader/registry.h"
 namespace Common::FS {
 class IOFile;
 }
 namespace OpenGL {
 using ProgramCode = std::vector<u64>;
 /// Describes a shader and how it's used by the guest GPU
 struct ShaderDiskCacheEntry {
    ShaderDiskCacheEntry();
    ~ShaderDiskCacheEntry();
    bool Load(Common::FS::IOFile& file);
    bool Save(Common::FS::IOFile& file) const;
    bool HasProgramA() const {
        return !code.empty() && !code_b.empty();
    }
    Tegra::Engines::ShaderType type{};
    ProgramCode code;
    ProgramCode code_b;
    u64 unique_identifier = 0;
    std::optional<u32> texture_handler_size;
    u32 bound_buffer = 0;
    VideoCommon::Shader::GraphicsInfo graphics_info;
    VideoCommon::Shader::ComputeInfo compute_info;
    VideoCommon::Shader::KeyMap keys;
    VideoCommon::Shader::BoundSamplerMap bound_samplers;
    VideoCommon::Shader::SeparateSamplerMap separate_samplers;
    VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
 };
 /// Contains an OpenGL dumped binary program
 struct ShaderDiskCachePrecompiled {
    u64 unique_identifier = 0;
    GLenum binary_format = 0;
    std::vector<u8> binary;
 };
 class ShaderDiskCacheOpenGL {
 public:
    explicit ShaderDiskCacheOpenGL();
    ~ShaderDiskCacheOpenGL();
    /// Binds a title ID for all future operations.
    void BindTitleID(u64 title_id);
    /// Loads transferable cache. If file has a old version or on failure, it deletes the file.
    std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
    /// Loads current game's precompiled cache. Invalidates on failure.
    std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
    /// Removes the transferable (and precompiled) cache file.
    void InvalidateTransferable();
    /// Removes the precompiled cache file and clears virtual precompiled cache file.
    void InvalidatePrecompiled();
    /// Saves a raw dump to the transferable file. Checks for collisions.
    void SaveEntry(const ShaderDiskCacheEntry& entry);
    /// Saves a dump entry to the precompiled file. Does not check for collisions.
    void SavePrecompiled(u64 unique_identifier, GLuint program);
    /// Serializes virtual precompiled shader cache file to real file
    void SaveVirtualPrecompiledFile();
 private:
    /// Loads the transferable cache. Returns empty on failure.
    std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
        Common::FS::IOFile& file);
    /// Opens current game's transferable file and write it's header if it doesn't exist
    Common::FS::IOFile AppendTransferableFile() const;
    /// Save precompiled header to precompiled_cache_in_memory
    void SavePrecompiledHeaderToVirtualPrecompiledCache();
    /// Create shader disk cache directories. Returns true on success.
    bool EnsureDirectories() const;
    /// Gets current game's transferable file path
    std::filesystem::path GetTransferablePath() const;
    /// Gets current game's precompiled file path
    std::filesystem::path GetPrecompiledPath() const;
    /// Get user's transferable directory path
    std::filesystem::path GetTransferableDir() const;
    /// Get user's precompiled directory path
    std::filesystem::path GetPrecompiledDir() const;
    /// Get user's shader directory path
    std::filesystem::path GetBaseDir() const;
    /// Get current game's title id
    std::string GetTitleID() const;
    template <typename T>
    bool SaveArrayToPrecompiled(const T* data, std::size_t length) {
        const std::size_t write_length = precompiled_cache_virtual_file.WriteArray(
            data, length, precompiled_cache_virtual_file_offset);
        precompiled_cache_virtual_file_offset += write_length;
        return write_length == sizeof(T) * length;
    }
    template <typename T>
    bool LoadArrayFromPrecompiled(T* data, std::size_t length) {
        const std::size_t read_length = precompiled_cache_virtual_file.ReadArray(
            data, length, precompiled_cache_virtual_file_offset);
        precompiled_cache_virtual_file_offset += read_length;
        return read_length == sizeof(T) * length;
    }
    template <typename T>
    bool SaveObjectToPrecompiled(const T& object) {
        return SaveArrayToPrecompiled(&object, 1);
    }
    bool SaveObjectToPrecompiled(bool object) {
        const auto value = static_cast<u8>(object);
        return SaveArrayToPrecompiled(&value, 1);
    }
    template <typename T>
    bool LoadObjectFromPrecompiled(T& object) {
        return LoadArrayFromPrecompiled(&object, 1);
    }
    // Stores whole precompiled cache which will be read from or saved to the precompiled chache
    // file
    FileSys::VectorVfsFile precompiled_cache_virtual_file;
    // Stores the current offset of the precompiled cache file for IO purposes
    std::size_t precompiled_cache_virtual_file_offset = 0;
    // Stored transferable shaders
    std::unordered_set<u64> stored_transferable;
    /// Title ID to operate on
    u64 title_id = 0;
    // The cache has been loaded at boot
    bool is_usable = false;
 };
 } // namespace OpenGL
--- a/src/video_core/renderer_vulkan/blit_image.cpp
+++ b/src/video_core/renderer_vulkan/blit_image.cpp
@ -323,7 +323,6 @@ void BindBlitState(vk::CommandBuffer cmdbuf, VkPipelineLayout layout, const Regi
    cmdbuf.SetScissor(0, scissor);
    cmdbuf.PushConstants(layout, VK_SHADER_STAGE_VERTEX_BIT, push_constants);
 }
 } // Anonymous namespace
 BlitImageHelper::BlitImageHelper(const Device& device_, VKScheduler& scheduler_,
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
@ -8,146 +8,14 @@
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 namespace Vulkan {
-VKComputePipeline::VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
+ComputePipeline::ComputePipeline() = default;
                                     VKDescriptorPool& descriptor_pool_,
                                     VKUpdateDescriptorQueue& update_descriptor_queue_,
                                     const SPIRVShader& shader_)
    : device{device_}, scheduler{scheduler_}, entries{shader_.entries},
      descriptor_set_layout{CreateDescriptorSetLayout()},
      descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
      update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
      descriptor_template{CreateDescriptorUpdateTemplate()},
      shader_module{CreateShaderModule(shader_.code)}, pipeline{CreatePipeline()} {}
-VKComputePipeline::~VKComputePipeline() = default;
+ComputePipeline::~ComputePipeline() = default;
 VkDescriptorSet VKComputePipeline::CommitDescriptorSet() {
    if (!descriptor_template) {
        return {};
    }
    const VkDescriptorSet set = descriptor_allocator.Commit();
    update_descriptor_queue.Send(*descriptor_template, set);
    return set;
 }
 vk::DescriptorSetLayout VKComputePipeline::CreateDescriptorSetLayout() const {
    std::vector<VkDescriptorSetLayoutBinding> bindings;
    u32 binding = 0;
    const auto add_bindings = [&](VkDescriptorType descriptor_type, std::size_t num_entries) {
        // TODO(Rodrigo): Maybe make individual bindings here?
        for (u32 bindpoint = 0; bindpoint < static_cast<u32>(num_entries); ++bindpoint) {
            bindings.push_back({
                .binding = binding++,
                .descriptorType = descriptor_type,
                .descriptorCount = 1,
                .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
                .pImmutableSamplers = nullptr,
            });
        }
    };
    add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, entries.const_buffers.size());
    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, entries.global_buffers.size());
    add_bindings(VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, entries.uniform_texels.size());
    add_bindings(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, entries.samplers.size());
    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, entries.storage_texels.size());
    add_bindings(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, entries.images.size());
    return device.GetLogical().CreateDescriptorSetLayout({
        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .bindingCount = static_cast<u32>(bindings.size()),
        .pBindings = bindings.data(),
    });
 }
 vk::PipelineLayout VKComputePipeline::CreatePipelineLayout() const {
    return device.GetLogical().CreatePipelineLayout({
        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .setLayoutCount = 1,
        .pSetLayouts = descriptor_set_layout.address(),
        .pushConstantRangeCount = 0,
        .pPushConstantRanges = nullptr,
    });
 }
 vk::DescriptorUpdateTemplateKHR VKComputePipeline::CreateDescriptorUpdateTemplate() const {
    std::vector<VkDescriptorUpdateTemplateEntryKHR> template_entries;
    u32 binding = 0;
    u32 offset = 0;
    FillDescriptorUpdateTemplateEntries(entries, binding, offset, template_entries);
    if (template_entries.empty()) {
        // If the shader doesn't use descriptor sets, skip template creation.
        return {};
    }
    return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
        .pNext = nullptr,
        .flags = 0,
        .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
        .pDescriptorUpdateEntries = template_entries.data(),
        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
        .descriptorSetLayout = *descriptor_set_layout,
        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
        .pipelineLayout = *layout,
        .set = DESCRIPTOR_SET,
    });
 }
 vk::ShaderModule VKComputePipeline::CreateShaderModule(const std::vector<u32>& code) const {
    device.SaveShader(code);
    return device.GetLogical().CreateShaderModule({
        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .codeSize = code.size() * sizeof(u32),
        .pCode = code.data(),
    });
 }
 vk::Pipeline VKComputePipeline::CreatePipeline() const {
    VkComputePipelineCreateInfo ci{
        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .stage =
            {
                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
                .pNext = nullptr,
                .flags = 0,
                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
                .module = *shader_module,
                .pName = "main",
                .pSpecializationInfo = nullptr,
            },
        .layout = *layout,
        .basePipelineHandle = nullptr,
        .basePipelineIndex = 0,
    };
    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
        .pNext = nullptr,
        .requiredSubgroupSize = GuestWarpSize,
    };
    if (entries.uses_warps && device.IsGuestWarpSizeSupported(VK_SHADER_STAGE_COMPUTE_BIT)) {
        ci.stage.pNext = &subgroup_size_ci;
    }
    return device.GetLogical().CreateComputePipeline(ci);
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_compute_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.h
@ -6,7 +6,6 @@
 #include "common/common_types.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 namespace Vulkan {
@ -15,50 +14,10 @@ class Device;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
-class VKComputePipeline final {
+class ComputePipeline {
 public:
-    explicit VKComputePipeline(const Device& device_, VKScheduler& scheduler_,
+    explicit ComputePipeline();
-                               VKDescriptorPool& descriptor_pool_,
+    ~ComputePipeline();
                               VKUpdateDescriptorQueue& update_descriptor_queue_,
                               const SPIRVShader& shader_);
    ~VKComputePipeline();
    VkDescriptorSet CommitDescriptorSet();
    VkPipeline GetHandle() const {
        return *pipeline;
    }
    VkPipelineLayout GetLayout() const {
        return *layout;
    }
    const ShaderEntries& GetEntries() const {
        return entries;
    }
 private:
    vk::DescriptorSetLayout CreateDescriptorSetLayout() const;
    vk::PipelineLayout CreatePipelineLayout() const;
    vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate() const;
    vk::ShaderModule CreateShaderModule(const std::vector<u32>& code) const;
    vk::Pipeline CreatePipeline() const;
    const Device& device;
    VKScheduler& scheduler;
    ShaderEntries entries;
    vk::DescriptorSetLayout descriptor_set_layout;
    DescriptorAllocator descriptor_allocator;
    VKUpdateDescriptorQueue& update_descriptor_queue;
    vk::PipelineLayout layout;
    vk::DescriptorUpdateTemplateKHR descriptor_template;
    vk::ShaderModule shader_module;
    vk::Pipeline pipeline;
 };
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@ -1,484 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <array>
 #include <cstring>
 #include <vector>
 #include "common/common_types.h"
 #include "common/microprofile.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 namespace Vulkan {
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 namespace {
 template <class StencilFace>
 VkStencilOpState GetStencilFaceState(const StencilFace& face) {
    return {
        .failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()),
        .passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()),
        .depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()),
        .compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()),
        .compareMask = 0,
        .writeMask = 0,
        .reference = 0,
    };
 }
 bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
    static constexpr std::array unsupported_topologies = {
        VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
        VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
        VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
        VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
        VK_PRIMITIVE_TOPOLOGY_PATCH_LIST};
    return std::find(std::begin(unsupported_topologies), std::end(unsupported_topologies),
                     topology) == std::end(unsupported_topologies);
 }
 VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
    union Swizzle {
        u32 raw;
        BitField<0, 3, Maxwell::ViewportSwizzle> x;
        BitField<4, 3, Maxwell::ViewportSwizzle> y;
        BitField<8, 3, Maxwell::ViewportSwizzle> z;
        BitField<12, 3, Maxwell::ViewportSwizzle> w;
    };
    const Swizzle unpacked{swizzle};
    return {
        .x = MaxwellToVK::ViewportSwizzle(unpacked.x),
        .y = MaxwellToVK::ViewportSwizzle(unpacked.y),
        .z = MaxwellToVK::ViewportSwizzle(unpacked.z),
        .w = MaxwellToVK::ViewportSwizzle(unpacked.w),
    };
 }
 VkSampleCountFlagBits ConvertMsaaMode(Tegra::Texture::MsaaMode msaa_mode) {
    switch (msaa_mode) {
    case Tegra::Texture::MsaaMode::Msaa1x1:
        return VK_SAMPLE_COUNT_1_BIT;
    case Tegra::Texture::MsaaMode::Msaa2x1:
    case Tegra::Texture::MsaaMode::Msaa2x1_D3D:
        return VK_SAMPLE_COUNT_2_BIT;
    case Tegra::Texture::MsaaMode::Msaa2x2:
    case Tegra::Texture::MsaaMode::Msaa2x2_VC4:
    case Tegra::Texture::MsaaMode::Msaa2x2_VC12:
        return VK_SAMPLE_COUNT_4_BIT;
    case Tegra::Texture::MsaaMode::Msaa4x2:
    case Tegra::Texture::MsaaMode::Msaa4x2_D3D:
    case Tegra::Texture::MsaaMode::Msaa4x2_VC8:
    case Tegra::Texture::MsaaMode::Msaa4x2_VC24:
        return VK_SAMPLE_COUNT_8_BIT;
    case Tegra::Texture::MsaaMode::Msaa4x4:
        return VK_SAMPLE_COUNT_16_BIT;
    default:
        UNREACHABLE_MSG("Invalid msaa_mode={}", static_cast<int>(msaa_mode));
        return VK_SAMPLE_COUNT_1_BIT;
    }
 }
 } // Anonymous namespace
 VKGraphicsPipeline::VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
                                       VKDescriptorPool& descriptor_pool_,
                                       VKUpdateDescriptorQueue& update_descriptor_queue_,
                                       const GraphicsPipelineCacheKey& key,
                                       vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                       const SPIRVProgram& program, u32 num_color_buffers)
    : device{device_}, scheduler{scheduler_}, cache_key{key}, hash{cache_key.Hash()},
      descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
      descriptor_allocator{descriptor_pool_, *descriptor_set_layout},
      update_descriptor_queue{update_descriptor_queue_}, layout{CreatePipelineLayout()},
      descriptor_template{CreateDescriptorUpdateTemplate(program)},
      modules(CreateShaderModules(program)),
      pipeline(CreatePipeline(program, cache_key.renderpass, num_color_buffers)) {}
 VKGraphicsPipeline::~VKGraphicsPipeline() = default;
 VkDescriptorSet VKGraphicsPipeline::CommitDescriptorSet() {
    if (!descriptor_template) {
        return {};
    }
    const VkDescriptorSet set = descriptor_allocator.Commit();
    update_descriptor_queue.Send(*descriptor_template, set);
    return set;
 }
 vk::DescriptorSetLayout VKGraphicsPipeline::CreateDescriptorSetLayout(
    vk::Span<VkDescriptorSetLayoutBinding> bindings) const {
    const VkDescriptorSetLayoutCreateInfo ci{
        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .bindingCount = bindings.size(),
        .pBindings = bindings.data(),
    };
    return device.GetLogical().CreateDescriptorSetLayout(ci);
 }
 vk::PipelineLayout VKGraphicsPipeline::CreatePipelineLayout() const {
    const VkPipelineLayoutCreateInfo ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .setLayoutCount = 1,
        .pSetLayouts = descriptor_set_layout.address(),
        .pushConstantRangeCount = 0,
        .pPushConstantRanges = nullptr,
    };
    return device.GetLogical().CreatePipelineLayout(ci);
 }
 vk::DescriptorUpdateTemplateKHR VKGraphicsPipeline::CreateDescriptorUpdateTemplate(
    const SPIRVProgram& program) const {
    std::vector<VkDescriptorUpdateTemplateEntry> template_entries;
    u32 binding = 0;
    u32 offset = 0;
    for (const auto& stage : program) {
        if (stage) {
            FillDescriptorUpdateTemplateEntries(stage->entries, binding, offset, template_entries);
        }
    }
    if (template_entries.empty()) {
        // If the shader doesn't use descriptor sets, skip template creation.
        return {};
    }
    const VkDescriptorUpdateTemplateCreateInfoKHR ci{
        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR,
        .pNext = nullptr,
        .flags = 0,
        .descriptorUpdateEntryCount = static_cast<u32>(template_entries.size()),
        .pDescriptorUpdateEntries = template_entries.data(),
        .templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET_KHR,
        .descriptorSetLayout = *descriptor_set_layout,
        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
        .pipelineLayout = *layout,
        .set = DESCRIPTOR_SET,
    };
    return device.GetLogical().CreateDescriptorUpdateTemplateKHR(ci);
 }
 std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
    const SPIRVProgram& program) const {
    VkShaderModuleCreateInfo ci{
        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .codeSize = 0,
        .pCode = nullptr,
    };
    std::vector<vk::ShaderModule> shader_modules;
    shader_modules.reserve(Maxwell::MaxShaderStage);
    for (std::size_t i = 0; i < Maxwell::MaxShaderStage; ++i) {
        const auto& stage = program[i];
        if (!stage) {
            continue;
        }
        device.SaveShader(stage->code);
        ci.codeSize = stage->code.size() * sizeof(u32);
        ci.pCode = stage->code.data();
        shader_modules.push_back(device.GetLogical().CreateShaderModule(ci));
    }
    return shader_modules;
 }
 vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program,
                                                VkRenderPass renderpass,
                                                u32 num_color_buffers) const {
    const auto& state = cache_key.fixed_state;
    const auto& viewport_swizzles = state.viewport_swizzles;
    FixedPipelineState::DynamicState dynamic;
    if (device.IsExtExtendedDynamicStateSupported()) {
        // Insert dummy values, as long as they are valid they don't matter as extended dynamic
        // state is ignored
        dynamic.raw1 = 0;
        dynamic.raw2 = 0;
        dynamic.vertex_strides.fill(0);
    } else {
        dynamic = state.dynamic_state;
    }
    std::vector<VkVertexInputBindingDescription> vertex_bindings;
    std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
    for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
        const bool instanced = state.binding_divisors[index] != 0;
        const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
        vertex_bindings.push_back({
            .binding = static_cast<u32>(index),
            .stride = dynamic.vertex_strides[index],
            .inputRate = rate,
        });
        if (instanced) {
            vertex_binding_divisors.push_back({
                .binding = static_cast<u32>(index),
                .divisor = state.binding_divisors[index],
            });
        }
    }
    std::vector<VkVertexInputAttributeDescription> vertex_attributes;
    const auto& input_attributes = program[0]->entries.attributes;
    for (std::size_t index = 0; index < state.attributes.size(); ++index) {
        const auto& attribute = state.attributes[index];
        if (!attribute.enabled) {
            continue;
        }
        if (!input_attributes.contains(static_cast<u32>(index))) {
            // Skip attributes not used by the vertex shaders.
            continue;
        }
        vertex_attributes.push_back({
            .location = static_cast<u32>(index),
            .binding = attribute.buffer,
            .format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()),
            .offset = attribute.offset,
        });
    }
    VkPipelineVertexInputStateCreateInfo vertex_input_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .vertexBindingDescriptionCount = static_cast<u32>(vertex_bindings.size()),
        .pVertexBindingDescriptions = vertex_bindings.data(),
        .vertexAttributeDescriptionCount = static_cast<u32>(vertex_attributes.size()),
        .pVertexAttributeDescriptions = vertex_attributes.data(),
    };
    const VkPipelineVertexInputDivisorStateCreateInfoEXT input_divisor_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT,
        .pNext = nullptr,
        .vertexBindingDivisorCount = static_cast<u32>(vertex_binding_divisors.size()),
        .pVertexBindingDivisors = vertex_binding_divisors.data(),
    };
    if (!vertex_binding_divisors.empty()) {
        vertex_input_ci.pNext = &input_divisor_ci;
    }
    const auto input_assembly_topology = MaxwellToVK::PrimitiveTopology(device, state.topology);
    const VkPipelineInputAssemblyStateCreateInfo input_assembly_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .topology = MaxwellToVK::PrimitiveTopology(device, state.topology),
        .primitiveRestartEnable = state.primitive_restart_enable != 0 &&
                                  SupportsPrimitiveRestart(input_assembly_topology),
    };
    const VkPipelineTessellationStateCreateInfo tessellation_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .patchControlPoints = state.patch_control_points_minus_one.Value() + 1,
    };
    VkPipelineViewportStateCreateInfo viewport_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .viewportCount = Maxwell::NumViewports,
        .pViewports = nullptr,
        .scissorCount = Maxwell::NumViewports,
        .pScissors = nullptr,
    };
    std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
    std::ranges::transform(viewport_swizzles, swizzles.begin(), UnpackViewportSwizzle);
    VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV,
        .pNext = nullptr,
        .flags = 0,
        .viewportCount = Maxwell::NumViewports,
        .pViewportSwizzles = swizzles.data(),
    };
    if (device.IsNvViewportSwizzleSupported()) {
        viewport_ci.pNext = &swizzle_ci;
    }
    const VkPipelineRasterizationStateCreateInfo rasterization_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .depthClampEnable =
            static_cast<VkBool32>(state.depth_clamp_disabled == 0 ? VK_TRUE : VK_FALSE),
        .rasterizerDiscardEnable =
            static_cast<VkBool32>(state.rasterize_enable == 0 ? VK_TRUE : VK_FALSE),
        .polygonMode = VK_POLYGON_MODE_FILL,
        .cullMode = static_cast<VkCullModeFlags>(
            dynamic.cull_enable ? MaxwellToVK::CullFace(dynamic.CullFace()) : VK_CULL_MODE_NONE),
        .frontFace = MaxwellToVK::FrontFace(dynamic.FrontFace()),
        .depthBiasEnable = state.depth_bias_enable,
        .depthBiasConstantFactor = 0.0f,
        .depthBiasClamp = 0.0f,
        .depthBiasSlopeFactor = 0.0f,
        .lineWidth = 1.0f,
    };
    const VkPipelineMultisampleStateCreateInfo multisample_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .rasterizationSamples = ConvertMsaaMode(state.msaa_mode),
        .sampleShadingEnable = VK_FALSE,
        .minSampleShading = 0.0f,
        .pSampleMask = nullptr,
        .alphaToCoverageEnable = VK_FALSE,
        .alphaToOneEnable = VK_FALSE,
    };
    const VkPipelineDepthStencilStateCreateInfo depth_stencil_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .depthTestEnable = dynamic.depth_test_enable,
        .depthWriteEnable = dynamic.depth_write_enable,
        .depthCompareOp = dynamic.depth_test_enable
                              ? MaxwellToVK::ComparisonOp(dynamic.DepthTestFunc())
                              : VK_COMPARE_OP_ALWAYS,
        .depthBoundsTestEnable = dynamic.depth_bounds_enable,
        .stencilTestEnable = dynamic.stencil_enable,
        .front = GetStencilFaceState(dynamic.front),
        .back = GetStencilFaceState(dynamic.back),
        .minDepthBounds = 0.0f,
        .maxDepthBounds = 0.0f,
    };
    std::array<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
    for (std::size_t index = 0; index < num_color_buffers; ++index) {
        static constexpr std::array COMPONENT_TABLE{
            VK_COLOR_COMPONENT_R_BIT,
            VK_COLOR_COMPONENT_G_BIT,
            VK_COLOR_COMPONENT_B_BIT,
            VK_COLOR_COMPONENT_A_BIT,
        };
        const auto& blend = state.attachments[index];
        VkColorComponentFlags color_components = 0;
        for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) {
            if (blend.Mask()[i]) {
                color_components |= COMPONENT_TABLE[i];
            }
        }
        cb_attachments[index] = {
            .blendEnable = blend.enable != 0,
            .srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()),
            .dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()),
            .colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()),
            .srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()),
            .dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()),
            .alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()),
            .colorWriteMask = color_components,
        };
    }
    const VkPipelineColorBlendStateCreateInfo color_blend_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .logicOpEnable = VK_FALSE,
        .logicOp = VK_LOGIC_OP_COPY,
        .attachmentCount = num_color_buffers,
        .pAttachments = cb_attachments.data(),
        .blendConstants = {},
    };
    std::vector dynamic_states{
        VK_DYNAMIC_STATE_VIEWPORT,           VK_DYNAMIC_STATE_SCISSOR,
        VK_DYNAMIC_STATE_DEPTH_BIAS,         VK_DYNAMIC_STATE_BLEND_CONSTANTS,
        VK_DYNAMIC_STATE_DEPTH_BOUNDS,       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
        VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, VK_DYNAMIC_STATE_STENCIL_REFERENCE,
    };
    if (device.IsExtExtendedDynamicStateSupported()) {
        static constexpr std::array extended{
            VK_DYNAMIC_STATE_CULL_MODE_EXT,
            VK_DYNAMIC_STATE_FRONT_FACE_EXT,
            VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT,
            VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT,
            VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT,
            VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT,
            VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT,
            VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT,
            VK_DYNAMIC_STATE_STENCIL_OP_EXT,
        };
        dynamic_states.insert(dynamic_states.end(), extended.begin(), extended.end());
    }
    const VkPipelineDynamicStateCreateInfo dynamic_state_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .dynamicStateCount = static_cast<u32>(dynamic_states.size()),
        .pDynamicStates = dynamic_states.data(),
    };
    const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
        .pNext = nullptr,
        .requiredSubgroupSize = GuestWarpSize,
    };
    std::vector<VkPipelineShaderStageCreateInfo> shader_stages;
    std::size_t module_index = 0;
    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        if (!program[stage]) {
            continue;
        }
        VkPipelineShaderStageCreateInfo& stage_ci = shader_stages.emplace_back();
        stage_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
        stage_ci.pNext = nullptr;
        stage_ci.flags = 0;
        stage_ci.stage = MaxwellToVK::ShaderStage(static_cast<Tegra::Engines::ShaderType>(stage));
        stage_ci.module = *modules[module_index++];
        stage_ci.pName = "main";
        stage_ci.pSpecializationInfo = nullptr;
        if (program[stage]->entries.uses_warps && device.IsGuestWarpSizeSupported(stage_ci.stage)) {
            stage_ci.pNext = &subgroup_size_ci;
        }
    }
    return device.GetLogical().CreateGraphicsPipeline(VkGraphicsPipelineCreateInfo{
        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
        .pNext = nullptr,
        .flags = 0,
        .stageCount = static_cast<u32>(shader_stages.size()),
        .pStages = shader_stages.data(),
        .pVertexInputState = &vertex_input_ci,
        .pInputAssemblyState = &input_assembly_ci,
        .pTessellationState = &tessellation_ci,
        .pViewportState = &viewport_ci,
        .pRasterizationState = &rasterization_ci,
        .pMultisampleState = &multisample_ci,
        .pDepthStencilState = &depth_stencil_ci,
        .pColorBlendState = &color_blend_ci,
        .pDynamicState = &dynamic_state_ci,
        .layout = *layout,
        .renderPass = renderpass,
        .subpass = 0,
        .basePipelineHandle = nullptr,
        .basePipelineIndex = 0,
    });
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.h
@ -1,103 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <array>
 #include <optional>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 namespace Vulkan {
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 struct GraphicsPipelineCacheKey {
    VkRenderPass renderpass;
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
    FixedPipelineState fixed_state;
    std::size_t Hash() const noexcept;
    bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
    bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
        return !operator==(rhs);
    }
    std::size_t Size() const noexcept {
        return sizeof(renderpass) + sizeof(shaders) + fixed_state.Size();
    }
 };
 static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
 class Device;
 class VKDescriptorPool;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
 using SPIRVProgram = std::array<std::optional<SPIRVShader>, Maxwell::MaxShaderStage>;
 class VKGraphicsPipeline final {
 public:
    explicit VKGraphicsPipeline(const Device& device_, VKScheduler& scheduler_,
                                VKDescriptorPool& descriptor_pool,
                                VKUpdateDescriptorQueue& update_descriptor_queue_,
                                const GraphicsPipelineCacheKey& key,
                                vk::Span<VkDescriptorSetLayoutBinding> bindings,
                                const SPIRVProgram& program, u32 num_color_buffers);
    ~VKGraphicsPipeline();
    VkDescriptorSet CommitDescriptorSet();
    VkPipeline GetHandle() const {
        return *pipeline;
    }
    VkPipelineLayout GetLayout() const {
        return *layout;
    }
    GraphicsPipelineCacheKey GetCacheKey() const {
        return cache_key;
    }
 private:
    vk::DescriptorSetLayout CreateDescriptorSetLayout(
        vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
    vk::PipelineLayout CreatePipelineLayout() const;
    vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
        const SPIRVProgram& program) const;
    std::vector<vk::ShaderModule> CreateShaderModules(const SPIRVProgram& program) const;
    vk::Pipeline CreatePipeline(const SPIRVProgram& program, VkRenderPass renderpass,
                                u32 num_color_buffers) const;
    const Device& device;
    VKScheduler& scheduler;
    const GraphicsPipelineCacheKey cache_key;
    const u64 hash;
    vk::DescriptorSetLayout descriptor_set_layout;
    DescriptorAllocator descriptor_allocator;
    VKUpdateDescriptorQueue& update_descriptor_queue;
    vk::PipelineLayout layout;
    vk::DescriptorUpdateTemplateKHR descriptor_template;
    std::vector<vk::ShaderModule> modules;
    vk::Pipeline pipeline;
 };
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@ -19,49 +19,27 @@
 #include "video_core/renderer_vulkan/maxwell_to_vk.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader_cache.h"
 #include "video_core/shader_notify.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
 namespace Vulkan {
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 using Tegra::Engines::ShaderType;
 using VideoCommon::Shader::GetShaderAddress;
 using VideoCommon::Shader::GetShaderCode;
 using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::STAGE_MAIN_OFFSET;
 namespace {
-
+size_t StageFromProgram(size_t program) {
 constexpr VkDescriptorType UNIFORM_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
 constexpr VkDescriptorType STORAGE_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
 constexpr VkDescriptorType UNIFORM_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
 constexpr VkDescriptorType COMBINED_IMAGE_SAMPLER = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
 constexpr VkDescriptorType STORAGE_TEXEL_BUFFER = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
 constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
 constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
    .depth = VideoCommon::Shader::CompileDepth::FullDecompile,
    .disable_else_derivation = true,
 };
 constexpr std::size_t GetStageFromProgram(std::size_t program) {
    return program == 0 ? 0 : program - 1;
 }
-constexpr ShaderType GetStageFromProgram(Maxwell::ShaderProgram program) {
+ShaderType StageFromProgram(Maxwell::ShaderProgram program) {
-    return static_cast<ShaderType>(GetStageFromProgram(static_cast<std::size_t>(program)));
+    return static_cast<ShaderType>(StageFromProgram(static_cast<size_t>(program)));
 }
 ShaderType GetShaderType(Maxwell::ShaderProgram program) {
@ -81,165 +59,35 @@ ShaderType GetShaderType(Maxwell::ShaderProgram program) {
        return ShaderType::Vertex;
    }
 }
 template <VkDescriptorType descriptor_type, class Container>
 void AddBindings(std::vector<VkDescriptorSetLayoutBinding>& bindings, u32& binding,
                 VkShaderStageFlags stage_flags, const Container& container) {
    const u32 num_entries = static_cast<u32>(std::size(container));
    for (std::size_t i = 0; i < num_entries; ++i) {
        u32 count = 1;
        if constexpr (descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
            // Combined image samplers can be arrayed.
            count = container[i].size;
        }
        bindings.push_back({
            .binding = binding++,
            .descriptorType = descriptor_type,
            .descriptorCount = count,
            .stageFlags = stage_flags,
            .pImmutableSamplers = nullptr,
        });
    }
 }
 u32 FillDescriptorLayout(const ShaderEntries& entries,
                         std::vector<VkDescriptorSetLayoutBinding>& bindings,
                         Maxwell::ShaderProgram program_type, u32 base_binding) {
    const ShaderType stage = GetStageFromProgram(program_type);
    const VkShaderStageFlags flags = MaxwellToVK::ShaderStage(stage);
    u32 binding = base_binding;
    AddBindings<UNIFORM_BUFFER>(bindings, binding, flags, entries.const_buffers);
    AddBindings<STORAGE_BUFFER>(bindings, binding, flags, entries.global_buffers);
    AddBindings<UNIFORM_TEXEL_BUFFER>(bindings, binding, flags, entries.uniform_texels);
    AddBindings<COMBINED_IMAGE_SAMPLER>(bindings, binding, flags, entries.samplers);
    AddBindings<STORAGE_TEXEL_BUFFER>(bindings, binding, flags, entries.storage_texels);
    AddBindings<STORAGE_IMAGE>(bindings, binding, flags, entries.images);
    return binding;
 }
 } // Anonymous namespace
-std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
+size_t ComputePipelineCacheKey::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
    return static_cast<std::size_t>(hash);
 }
 bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
    return std::memcmp(&rhs, this, Size()) == 0;
 }
 std::size_t ComputePipelineCacheKey::Hash() const noexcept {
    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
-    return static_cast<std::size_t>(hash);
+    return static_cast<size_t>(hash);
 }
 bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept {
    return std::memcmp(&rhs, this, sizeof *this) == 0;
 }
-Shader::Shader(Tegra::Engines::ConstBufferEngineInterface& engine_, ShaderType stage_,
+Shader::Shader() = default;
               GPUVAddr gpu_addr_, VAddr cpu_addr_, ProgramCode program_code_, u32 main_offset_)
    : gpu_addr(gpu_addr_), program_code(std::move(program_code_)), registry(stage_, engine_),
      shader_ir(program_code, main_offset_, compiler_settings, registry),
      entries(GenerateShaderEntries(shader_ir)) {}
 Shader::~Shader() = default;
-VKPipelineCache::VKPipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
+PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
-                                 Tegra::Engines::Maxwell3D& maxwell3d_,
+                             Tegra::Engines::Maxwell3D& maxwell3d_,
-                                 Tegra::Engines::KeplerCompute& kepler_compute_,
+                             Tegra::Engines::KeplerCompute& kepler_compute_,
-                                 Tegra::MemoryManager& gpu_memory_, const Device& device_,
+                             Tegra::MemoryManager& gpu_memory_, const Device& device_,
-                                 VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
+                             VKScheduler& scheduler_, VKDescriptorPool& descriptor_pool_,
-                                 VKUpdateDescriptorQueue& update_descriptor_queue_)
+                             VKUpdateDescriptorQueue& update_descriptor_queue_)
    : VideoCommon::ShaderCache<Shader>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
      kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
      scheduler{scheduler_}, descriptor_pool{descriptor_pool_}, update_descriptor_queue{
                                                                    update_descriptor_queue_} {}
-VKPipelineCache::~VKPipelineCache() = default;
+PipelineCache::~PipelineCache() = default;
-std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
+ComputePipeline& PipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
    std::array<Shader*, Maxwell::MaxShaderProgram> shaders{};
    for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
        const auto program{static_cast<Maxwell::ShaderProgram>(index)};
        // Skip stages that are not enabled
        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
            continue;
        }
        const GPUVAddr gpu_addr{GetShaderAddress(maxwell3d, program)};
        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
        ASSERT(cpu_addr);
        Shader* result = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
        if (!result) {
            const u8* const host_ptr{gpu_memory.GetPointer(gpu_addr)};
            // No shader found - create a new one
            static constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
            const auto stage = static_cast<ShaderType>(index == 0 ? 0 : index - 1);
            ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, false);
            const std::size_t size_in_bytes = code.size() * sizeof(u64);
            auto shader = std::make_unique<Shader>(maxwell3d, stage, gpu_addr, *cpu_addr,
                                                   std::move(code), stage_offset);
            result = shader.get();
            if (cpu_addr) {
                Register(std::move(shader), *cpu_addr, size_in_bytes);
            } else {
                null_shader = std::move(shader);
            }
        }
        shaders[index] = result;
    }
    return last_shaders = shaders;
 }
 VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
    const GraphicsPipelineCacheKey& key, u32 num_color_buffers,
    VideoCommon::Shader::AsyncShaders& async_shaders) {
    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
    if (last_graphics_pipeline && last_graphics_key == key) {
        return last_graphics_pipeline;
    }
    last_graphics_key = key;
    if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(gpu)) {
        std::unique_lock lock{pipeline_cache};
        const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
        if (is_cache_miss) {
            gpu.ShaderNotify().MarkSharderBuilding();
            LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
            const auto [program, bindings] = DecompileShaders(key.fixed_state);
            async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
                                            update_descriptor_queue, bindings, program, key,
                                            num_color_buffers);
        }
        last_graphics_pipeline = pair->second.get();
        return last_graphics_pipeline;
    }
    const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
    auto& entry = pair->second;
    if (is_cache_miss) {
        gpu.ShaderNotify().MarkSharderBuilding();
        LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
        const auto [program, bindings] = DecompileShaders(key.fixed_state);
        entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
                                                     update_descriptor_queue, key, bindings,
                                                     program, num_color_buffers);
        gpu.ShaderNotify().MarkShaderComplete();
    }
    last_graphics_pipeline = entry.get();
    return last_graphics_pipeline;
 }
 VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
    MICROPROFILE_SCOPE(Vulkan_PipelineCache);
    const auto [pair, is_cache_miss] = compute_cache.try_emplace(key);
@ -248,200 +96,9 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
        return *entry;
    }
    LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
-
+    throw "Bad";
    const GPUVAddr gpu_addr = key.shader;
    const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
    ASSERT(cpu_addr);
    Shader* shader = cpu_addr ? TryGet(*cpu_addr) : null_kernel.get();
    if (!shader) {
        // No shader found - create a new one
        const auto host_ptr = gpu_memory.GetPointer(gpu_addr);
        ProgramCode code = GetShaderCode(gpu_memory, gpu_addr, host_ptr, true);
        const std::size_t size_in_bytes = code.size() * sizeof(u64);
        auto shader_info = std::make_unique<Shader>(kepler_compute, ShaderType::Compute, gpu_addr,
                                                    *cpu_addr, std::move(code), KERNEL_MAIN_OFFSET);
        shader = shader_info.get();
        if (cpu_addr) {
            Register(std::move(shader_info), *cpu_addr, size_in_bytes);
        } else {
            null_kernel = std::move(shader_info);
        }
    }
    const Specialization specialization{
        .base_binding = 0,
        .workgroup_size = key.workgroup_size,
        .shared_memory_size = key.shared_memory_size,
        .point_size = std::nullopt,
        .enabled_attributes = {},
        .attribute_types = {},
        .ndc_minus_one_to_one = false,
    };
    const SPIRVShader spirv_shader{Decompile(device, shader->GetIR(), ShaderType::Compute,
                                             shader->GetRegistry(), specialization),
                                   shader->GetEntries()};
    entry = std::make_unique<VKComputePipeline>(device, scheduler, descriptor_pool,
                                                update_descriptor_queue, spirv_shader);
    return *entry;
 }
-void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
+void PipelineCache::OnShaderRemoval(Shader*) {}
    gpu.ShaderNotify().MarkShaderComplete();
    std::unique_lock lock{pipeline_cache};
    graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
 }
 void VKPipelineCache::OnShaderRemoval(Shader* shader) {
    bool finished = false;
    const auto Finish = [&] {
        // TODO(Rodrigo): Instead of finishing here, wait for the fences that use this pipeline and
        // flush.
        if (finished) {
            return;
        }
        finished = true;
        scheduler.Finish();
    };
    const GPUVAddr invalidated_addr = shader->GetGpuAddr();
    for (auto it = graphics_cache.begin(); it != graphics_cache.end();) {
        auto& entry = it->first;
        if (std::find(entry.shaders.begin(), entry.shaders.end(), invalidated_addr) ==
            entry.shaders.end()) {
            ++it;
            continue;
        }
        Finish();
        it = graphics_cache.erase(it);
    }
    for (auto it = compute_cache.begin(); it != compute_cache.end();) {
        auto& entry = it->first;
        if (entry.shader != invalidated_addr) {
            ++it;
            continue;
        }
        Finish();
        it = compute_cache.erase(it);
    }
 }
 std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
 VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
    Specialization specialization;
    if (fixed_state.topology == Maxwell::PrimitiveTopology::Points) {
        float point_size;
        std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
        specialization.point_size = point_size;
        ASSERT(point_size != 0.0f);
    }
    for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) {
        const auto& attribute = fixed_state.attributes[i];
        specialization.enabled_attributes[i] = attribute.enabled.Value() != 0;
        specialization.attribute_types[i] = attribute.Type();
    }
    specialization.ndc_minus_one_to_one = fixed_state.ndc_minus_one_to_one;
    specialization.early_fragment_tests = fixed_state.early_z;
    // Alpha test
    specialization.alpha_test_func =
        FixedPipelineState::UnpackComparisonOp(fixed_state.alpha_test_func.Value());
    specialization.alpha_test_ref = Common::BitCast<float>(fixed_state.alpha_test_ref);
    SPIRVProgram program;
    std::vector<VkDescriptorSetLayoutBinding> bindings;
    for (std::size_t index = 1; index < Maxwell::MaxShaderProgram; ++index) {
        const auto program_enum = static_cast<Maxwell::ShaderProgram>(index);
        // Skip stages that are not enabled
        if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
            continue;
        }
        const GPUVAddr gpu_addr = GetShaderAddress(maxwell3d, program_enum);
        const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
        Shader* const shader = cpu_addr ? TryGet(*cpu_addr) : null_shader.get();
        const std::size_t stage = index == 0 ? 0 : index - 1; // Stage indices are 0 - 5
        const ShaderType program_type = GetShaderType(program_enum);
        const auto& entries = shader->GetEntries();
        program[stage] = {
            Decompile(device, shader->GetIR(), program_type, shader->GetRegistry(), specialization),
            entries,
        };
        const u32 old_binding = specialization.base_binding;
        specialization.base_binding =
            FillDescriptorLayout(entries, bindings, program_enum, specialization.base_binding);
        ASSERT(old_binding + entries.NumBindings() == specialization.base_binding);
    }
    return {std::move(program), std::move(bindings)};
 }
 template <VkDescriptorType descriptor_type, class Container>
 void AddEntry(std::vector<VkDescriptorUpdateTemplateEntry>& template_entries, u32& binding,
              u32& offset, const Container& container) {
    static constexpr u32 entry_size = static_cast<u32>(sizeof(DescriptorUpdateEntry));
    const u32 count = static_cast<u32>(std::size(container));
    if constexpr (descriptor_type == COMBINED_IMAGE_SAMPLER) {
        for (u32 i = 0; i < count; ++i) {
            const u32 num_samplers = container[i].size;
            template_entries.push_back({
                .dstBinding = binding,
                .dstArrayElement = 0,
                .descriptorCount = num_samplers,
                .descriptorType = descriptor_type,
                .offset = offset,
                .stride = entry_size,
            });
            ++binding;
            offset += num_samplers * entry_size;
        }
        return;
    }
    if constexpr (descriptor_type == UNIFORM_TEXEL_BUFFER ||
                  descriptor_type == STORAGE_TEXEL_BUFFER) {
        // Nvidia has a bug where updating multiple texels at once causes the driver to crash.
        // Note: Fixed in driver Windows 443.24, Linux 440.66.15
        for (u32 i = 0; i < count; ++i) {
            template_entries.push_back({
                .dstBinding = binding + i,
                .dstArrayElement = 0,
                .descriptorCount = 1,
                .descriptorType = descriptor_type,
                .offset = static_cast<std::size_t>(offset + i * entry_size),
                .stride = entry_size,
            });
        }
    } else if (count > 0) {
        template_entries.push_back({
            .dstBinding = binding,
            .dstArrayElement = 0,
            .descriptorCount = count,
            .descriptorType = descriptor_type,
            .offset = offset,
            .stride = entry_size,
        });
    }
    offset += count * entry_size;
    binding += count;
 }
 void FillDescriptorUpdateTemplateEntries(
    const ShaderEntries& entries, u32& binding, u32& offset,
    std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries) {
    AddEntry<UNIFORM_BUFFER>(template_entries, offset, binding, entries.const_buffers);
    AddEntry<STORAGE_BUFFER>(template_entries, offset, binding, entries.global_buffers);
    AddEntry<UNIFORM_TEXEL_BUFFER>(template_entries, offset, binding, entries.uniform_texels);
    AddEntry<COMBINED_IMAGE_SAMPLER>(template_entries, offset, binding, entries.samplers);
    AddEntry<STORAGE_TEXEL_BUFFER>(template_entries, offset, binding, entries.storage_texels);
    AddEntry<STORAGE_IMAGE>(template_entries, offset, binding, entries.images);
 }
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@ -15,15 +15,8 @@
 #include <boost/functional/hash.hpp>
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_vulkan/fixed_pipeline_state.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/shader_cache.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@ -35,7 +28,7 @@ namespace Vulkan {
 class Device;
 class RasterizerVulkan;
-class VKComputePipeline;
+class ComputePipeline;
 class VKDescriptorPool;
 class VKScheduler;
 class VKUpdateDescriptorQueue;
@ -47,7 +40,7 @@ struct ComputePipelineCacheKey {
    u32 shared_memory_size;
    std::array<u32, 3> workgroup_size;
-    std::size_t Hash() const noexcept;
+    size_t Hash() const noexcept;
    bool operator==(const ComputePipelineCacheKey& rhs) const noexcept;
@ -63,16 +56,9 @@ static_assert(std::is_trivially_constructible_v<ComputePipelineCacheKey>);
 namespace std {
 template <>
 struct hash<Vulkan::GraphicsPipelineCacheKey> {
    std::size_t operator()(const Vulkan::GraphicsPipelineCacheKey& k) const noexcept {
        return k.Hash();
    }
 };
 template <>
 struct hash<Vulkan::ComputePipelineCacheKey> {
-    std::size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
+    size_t operator()(const Vulkan::ComputePipelineCacheKey& k) const noexcept {
        return k.Hash();
    }
 };
@ -83,66 +69,26 @@ namespace Vulkan {
 class Shader {
 public:
-    explicit Shader(Tegra::Engines::ConstBufferEngineInterface& engine_,
+    explicit Shader();
                    Tegra::Engines::ShaderType stage_, GPUVAddr gpu_addr, VAddr cpu_addr_,
                    VideoCommon::Shader::ProgramCode program_code, u32 main_offset_);
    ~Shader();
    GPUVAddr GetGpuAddr() const {
        return gpu_addr;
    }
    VideoCommon::Shader::ShaderIR& GetIR() {
        return shader_ir;
    }
    const VideoCommon::Shader::ShaderIR& GetIR() const {
        return shader_ir;
    }
    const VideoCommon::Shader::Registry& GetRegistry() const {
        return registry;
    }
    const ShaderEntries& GetEntries() const {
        return entries;
    }
 private:
    GPUVAddr gpu_addr{};
    VideoCommon::Shader::ProgramCode program_code;
    VideoCommon::Shader::Registry registry;
    VideoCommon::Shader::ShaderIR shader_ir;
    ShaderEntries entries;
 };
-class VKPipelineCache final : public VideoCommon::ShaderCache<Shader> {
+class PipelineCache final : public VideoCommon::ShaderCache<Shader> {
 public:
-    explicit VKPipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
+    explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
-                             Tegra::Engines::Maxwell3D& maxwell3d,
+                           Tegra::Engines::Maxwell3D& maxwell3d,
-                             Tegra::Engines::KeplerCompute& kepler_compute,
+                           Tegra::Engines::KeplerCompute& kepler_compute,
-                             Tegra::MemoryManager& gpu_memory, const Device& device,
+                           Tegra::MemoryManager& gpu_memory, const Device& device,
-                             VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
+                           VKScheduler& scheduler, VKDescriptorPool& descriptor_pool,
-                             VKUpdateDescriptorQueue& update_descriptor_queue);
+                           VKUpdateDescriptorQueue& update_descriptor_queue);
-    ~VKPipelineCache() override;
+    ~PipelineCache() override;
-    std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
+    ComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
    VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
                                            u32 num_color_buffers,
                                            VideoCommon::Shader::AsyncShaders& async_shaders);
    VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
    void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
 protected:
    void OnShaderRemoval(Shader* shader) final;
 private:
    std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
        const FixedPipelineState& fixed_state);
    Tegra::GPU& gpu;
    Tegra::Engines::Maxwell3D& maxwell3d;
    Tegra::Engines::KeplerCompute& kepler_compute;
@ -158,17 +104,8 @@ private:
    std::array<Shader*, Maxwell::MaxShaderProgram> last_shaders{};
    GraphicsPipelineCacheKey last_graphics_key;
    VKGraphicsPipeline* last_graphics_pipeline = nullptr;
    std::mutex pipeline_cache;
-    std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
+    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
        graphics_cache;
    std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;
 };
 void FillDescriptorUpdateTemplateEntries(
    const ShaderEntries& entries, u32& binding, u32& offset,
    std::vector<VkDescriptorUpdateTemplateEntryKHR>& template_entries);
 } // namespace Vulkan
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -24,7 +24,6 @@
 #include "video_core/renderer_vulkan/vk_buffer_cache.h"
 #include "video_core/renderer_vulkan/vk_compute_pipeline.h"
 #include "video_core/renderer_vulkan/vk_descriptor_pool.h"
 #include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_rasterizer.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
@ -97,15 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
    return scissor;
 }
 std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders) {
    std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
    for (size_t i = 0; i < std::size(addresses); ++i) {
        addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
    }
    return addresses;
 }
 struct TextureHandle {
    constexpr TextureHandle(u32 data, bool via_header_index) {
        const Tegra::Texture::TextureHandle handle{data};
@ -117,98 +107,6 @@ struct TextureHandle {
    u32 sampler;
 };
 template <typename Engine, typename Entry>
 TextureHandle GetTextureInfo(const Engine& engine, bool via_header_index, const Entry& entry,
                             size_t stage, size_t index = 0) {
    const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
    if constexpr (std::is_same_v<Entry, SamplerEntry>) {
        if (entry.is_separated) {
            const u32 buffer_1 = entry.buffer;
            const u32 buffer_2 = entry.secondary_buffer;
            const u32 offset_1 = entry.offset;
            const u32 offset_2 = entry.secondary_offset;
            const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
            const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
            return TextureHandle(handle_1 | handle_2, via_header_index);
        }
    }
    if (entry.is_bindless) {
        const u32 raw = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
        return TextureHandle(raw, via_header_index);
    }
    const u32 buffer = engine.GetBoundBuffer();
    const u64 offset = (entry.offset + index) * sizeof(u32);
    return TextureHandle(engine.AccessConstBuffer32(shader_type, buffer, offset), via_header_index);
 }
 ImageViewType ImageViewTypeFromEntry(const SamplerEntry& entry) {
    if (entry.is_buffer) {
        return ImageViewType::e2D;
    }
    switch (entry.type) {
    case Tegra::Shader::TextureType::Texture1D:
        return entry.is_array ? ImageViewType::e1DArray : ImageViewType::e1D;
    case Tegra::Shader::TextureType::Texture2D:
        return entry.is_array ? ImageViewType::e2DArray : ImageViewType::e2D;
    case Tegra::Shader::TextureType::Texture3D:
        return ImageViewType::e3D;
    case Tegra::Shader::TextureType::TextureCube:
        return entry.is_array ? ImageViewType::CubeArray : ImageViewType::Cube;
    }
    UNREACHABLE();
    return ImageViewType::e2D;
 }
 ImageViewType ImageViewTypeFromEntry(const ImageEntry& entry) {
    switch (entry.type) {
    case Tegra::Shader::ImageType::Texture1D:
        return ImageViewType::e1D;
    case Tegra::Shader::ImageType::Texture1DArray:
        return ImageViewType::e1DArray;
    case Tegra::Shader::ImageType::Texture2D:
        return ImageViewType::e2D;
    case Tegra::Shader::ImageType::Texture2DArray:
        return ImageViewType::e2DArray;
    case Tegra::Shader::ImageType::Texture3D:
        return ImageViewType::e3D;
    case Tegra::Shader::ImageType::TextureBuffer:
        return ImageViewType::Buffer;
    }
    UNREACHABLE();
    return ImageViewType::e2D;
 }
 void PushImageDescriptors(const ShaderEntries& entries, TextureCache& texture_cache,
                          VKUpdateDescriptorQueue& update_descriptor_queue,
                          ImageViewId*& image_view_id_ptr, VkSampler*& sampler_ptr) {
    for ([[maybe_unused]] const auto& entry : entries.uniform_texels) {
        const ImageViewId image_view_id = *image_view_id_ptr++;
        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
        update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
    }
    for (const auto& entry : entries.samplers) {
        for (size_t i = 0; i < entry.size; ++i) {
            const VkSampler sampler = *sampler_ptr++;
            const ImageViewId image_view_id = *image_view_id_ptr++;
            const ImageView& image_view = texture_cache.GetImageView(image_view_id);
            const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
            update_descriptor_queue.AddSampledImage(handle, sampler);
        }
    }
    for ([[maybe_unused]] const auto& entry : entries.storage_texels) {
        const ImageViewId image_view_id = *image_view_id_ptr++;
        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
        update_descriptor_queue.AddTexelBuffer(image_view.BufferView());
    }
    for (const auto& entry : entries.images) {
        // TODO: Mark as modified
        const ImageViewId image_view_id = *image_view_id_ptr++;
        const ImageView& image_view = texture_cache.GetImageView(image_view_id);
        const VkImageView handle = image_view.Handle(ImageViewTypeFromEntry(entry));
        update_descriptor_queue.AddImage(handle);
    }
 }
 DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
                          bool is_indexed) {
    DrawParams params{
@ -253,71 +151,14 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
                     descriptor_pool, update_descriptor_queue),
      query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
      fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
-      wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) {
+      wfi_event(device.GetLogical().CreateEvent()) {
    scheduler.SetQueryCache(query_cache);
    if (device.UseAsynchronousShaders()) {
        async_shaders.AllocateWorkers();
    }
 }
 RasterizerVulkan::~RasterizerVulkan() = default;
 void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
-    MICROPROFILE_SCOPE(Vulkan_Drawing);
+    UNREACHABLE_MSG("Rendering not implemented {} {}", is_indexed, is_instanced);
    SCOPE_EXIT({ gpu.TickWork(); });
    FlushWork();
    query_cache.UpdateCounters();
    graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported());
    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    texture_cache.SynchronizeGraphicsDescriptors();
    texture_cache.UpdateRenderTargets(false);
    const auto shaders = pipeline_cache.GetShaders();
    graphics_key.shaders = GetShaderAddresses(shaders);
    SetupShaderDescriptors(shaders, is_indexed);
    const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
    graphics_key.renderpass = framebuffer->RenderPass();
    VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline(
        graphics_key, framebuffer->NumColorBuffers(), async_shaders);
    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
        // Async graphics pipeline was not ready.
        return;
    }
    BeginTransformFeedback();
    scheduler.RequestRenderpass(framebuffer);
    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
    UpdateDynamicStates();
    const auto& regs = maxwell3d.regs;
    const u32 num_instances = maxwell3d.mme_draw.instance_count;
    const DrawParams draw_params = MakeDrawParams(regs, num_instances, is_instanced, is_indexed);
    const VkPipelineLayout pipeline_layout = pipeline->GetLayout();
    const VkDescriptorSet descriptor_set = pipeline->CommitDescriptorSet();
    scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
        if (descriptor_set) {
            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
                                      DESCRIPTOR_SET, descriptor_set, nullptr);
        }
        if (draw_params.is_indexed) {
            cmdbuf.DrawIndexed(draw_params.num_vertices, draw_params.num_instances, 0,
                               draw_params.base_vertex, draw_params.base_instance);
        } else {
            cmdbuf.Draw(draw_params.num_vertices, draw_params.num_instances,
                        draw_params.base_vertex, draw_params.base_instance);
        }
    });
    EndTransformFeedback();
 }
 void RasterizerVulkan::Clear() {
@ -395,73 +236,8 @@ void RasterizerVulkan::Clear() {
    });
 }
-void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
+void RasterizerVulkan::DispatchCompute() {
-    MICROPROFILE_SCOPE(Vulkan_Compute);
+    UNREACHABLE_MSG("Not implemented");
    query_cache.UpdateCounters();
    const auto& launch_desc = kepler_compute.launch_description;
    auto& pipeline = pipeline_cache.GetComputePipeline({
        .shader = code_addr,
        .shared_memory_size = launch_desc.shared_alloc,
        .workgroup_size{
            launch_desc.block_dim_x,
            launch_desc.block_dim_y,
            launch_desc.block_dim_z,
        },
    });
    // Compute dispatches can't be executed inside a renderpass
    scheduler.RequestOutsideRenderPassOperationContext();
    image_view_indices.clear();
    sampler_handles.clear();
    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
    const auto& entries = pipeline.GetEntries();
    buffer_cache.SetEnabledComputeUniformBuffers(entries.enabled_uniform_buffers);
    buffer_cache.UnbindComputeStorageBuffers();
    u32 ssbo_index = 0;
    for (const auto& buffer : entries.global_buffers) {
        buffer_cache.BindComputeStorageBuffer(ssbo_index, buffer.cbuf_index, buffer.cbuf_offset,
                                              buffer.is_written);
        ++ssbo_index;
    }
    buffer_cache.UpdateComputeBuffers();
    texture_cache.SynchronizeComputeDescriptors();
    SetupComputeUniformTexels(entries);
    SetupComputeTextures(entries);
    SetupComputeStorageTexels(entries);
    SetupComputeImages(entries);
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    texture_cache.FillComputeImageViews(indices_span, image_view_ids);
    update_descriptor_queue.Acquire();
    buffer_cache.BindHostComputeBuffers();
    ImageViewId* image_view_id_ptr = image_view_ids.data();
    VkSampler* sampler_ptr = sampler_handles.data();
    PushImageDescriptors(entries, texture_cache, update_descriptor_queue, image_view_id_ptr,
                         sampler_ptr);
    const VkPipeline pipeline_handle = pipeline.GetHandle();
    const VkPipelineLayout pipeline_layout = pipeline.GetLayout();
    const VkDescriptorSet descriptor_set = pipeline.CommitDescriptorSet();
    scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
                      grid_z = launch_desc.grid_dim_z, pipeline_handle, pipeline_layout,
                      descriptor_set](vk::CommandBuffer cmdbuf) {
        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_handle);
        if (descriptor_set) {
            cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
                                      DESCRIPTOR_SET, descriptor_set, nullptr);
        }
        cmdbuf.Dispatch(grid_x, grid_y, grid_z);
    });
 }
 void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
@ -716,52 +492,6 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
    return buffer_cache.DMACopy(src_address, dest_address, amount);
 }
 void RasterizerVulkan::SetupShaderDescriptors(
    const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) {
    image_view_indices.clear();
    sampler_handles.clear();
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        Shader* const shader = shaders[stage + 1];
        if (!shader) {
            continue;
        }
        const ShaderEntries& entries = shader->GetEntries();
        SetupGraphicsUniformTexels(entries, stage);
        SetupGraphicsTextures(entries, stage);
        SetupGraphicsStorageTexels(entries, stage);
        SetupGraphicsImages(entries, stage);
        buffer_cache.SetEnabledUniformBuffers(stage, entries.enabled_uniform_buffers);
        buffer_cache.UnbindGraphicsStorageBuffers(stage);
        u32 ssbo_index = 0;
        for (const auto& buffer : entries.global_buffers) {
            buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, buffer.cbuf_index,
                                                   buffer.cbuf_offset, buffer.is_written);
            ++ssbo_index;
        }
    }
    const std::span indices_span(image_view_indices.data(), image_view_indices.size());
    buffer_cache.UpdateGraphicsBuffers(is_indexed);
    texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
    buffer_cache.BindHostGeometryBuffers(is_indexed);
    update_descriptor_queue.Acquire();
    ImageViewId* image_view_id_ptr = image_view_ids.data();
    VkSampler* sampler_ptr = sampler_handles.data();
    for (size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
        // Skip VertexA stage
        Shader* const shader = shaders[stage + 1];
        if (!shader) {
            continue;
        }
        buffer_cache.BindHostStageBuffers(stage);
        PushImageDescriptors(shader->GetEntries(), texture_cache, update_descriptor_queue,
                             image_view_id_ptr, sampler_ptr);
    }
 }
 void RasterizerVulkan::UpdateDynamicStates() {
    auto& regs = maxwell3d.regs;
    UpdateViewportsState(regs);
@ -810,89 +540,6 @@ void RasterizerVulkan::EndTransformFeedback() {
        [](vk::CommandBuffer cmdbuf) { cmdbuf.EndTransformFeedbackEXT(0, 0, nullptr, nullptr); });
 }
 void RasterizerVulkan::SetupGraphicsUniformTexels(const ShaderEntries& entries, size_t stage) {
    const auto& regs = maxwell3d.regs;
    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : entries.uniform_texels) {
        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, size_t stage) {
    const auto& regs = maxwell3d.regs;
    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : entries.samplers) {
        for (size_t index = 0; index < entry.size; ++index) {
            const TextureHandle handle =
                GetTextureInfo(maxwell3d, via_header_index, entry, stage, index);
            image_view_indices.push_back(handle.image);
            Sampler* const sampler = texture_cache.GetGraphicsSampler(handle.sampler);
            sampler_handles.push_back(sampler->Handle());
        }
    }
 }
 void RasterizerVulkan::SetupGraphicsStorageTexels(const ShaderEntries& entries, size_t stage) {
    const auto& regs = maxwell3d.regs;
    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : entries.storage_texels) {
        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, size_t stage) {
    const auto& regs = maxwell3d.regs;
    const bool via_header_index = regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex;
    for (const auto& entry : entries.images) {
        const TextureHandle handle = GetTextureInfo(maxwell3d, via_header_index, entry, stage);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::SetupComputeUniformTexels(const ShaderEntries& entries) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : entries.uniform_texels) {
        const TextureHandle handle =
            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : entries.samplers) {
        for (size_t index = 0; index < entry.size; ++index) {
            const TextureHandle handle = GetTextureInfo(kepler_compute, via_header_index, entry,
                                                        COMPUTE_SHADER_INDEX, index);
            image_view_indices.push_back(handle.image);
            Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
            sampler_handles.push_back(sampler->Handle());
        }
    }
 }
 void RasterizerVulkan::SetupComputeStorageTexels(const ShaderEntries& entries) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : entries.storage_texels) {
        const TextureHandle handle =
            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
    const bool via_header_index = kepler_compute.launch_description.linked_tsc;
    for (const auto& entry : entries.images) {
        const TextureHandle handle =
            GetTextureInfo(kepler_compute, via_header_index, entry, COMPUTE_SHADER_INDEX);
        image_view_indices.push_back(handle.image);
    }
 }
 void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs) {
    if (!state_tracker.TouchViewports()) {
        return;
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@ -28,7 +28,6 @@
 #include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
 #include "video_core/renderer_vulkan/vk_texture_cache.h"
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/shader/async_shaders.h"
 #include "video_core/vulkan_common/vulkan_memory_allocator.h"
 #include "video_core/vulkan_common/vulkan_wrapper.h"
@ -73,7 +72,7 @@ public:
    void Draw(bool is_indexed, bool is_instanced) override;
    void Clear() override;
-    void DispatchCompute(GPUVAddr code_addr) override;
+    void DispatchCompute() override;
    void ResetCounter(VideoCore::QueryType type) override;
    void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
    void BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, u32 size) override;
@ -103,19 +102,6 @@ public:
    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                           u32 pixel_stride) override;
    VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
        return async_shaders;
    }
    const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
        return async_shaders;
    }
    /// Maximum supported size that a constbuffer can have in bytes.
    static constexpr size_t MaxConstbufferSize = 0x10000;
    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
                  "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 private:
    static constexpr size_t MAX_TEXTURES = 192;
    static constexpr size_t MAX_IMAGES = 48;
@ -125,40 +111,12 @@ private:
    void FlushWork();
    /// Setup descriptors in the graphics pipeline.
    void SetupShaderDescriptors(const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders,
                                bool is_indexed);
    void UpdateDynamicStates();
    void BeginTransformFeedback();
    void EndTransformFeedback();
    /// Setup uniform texels in the graphics pipeline.
    void SetupGraphicsUniformTexels(const ShaderEntries& entries, std::size_t stage);
    /// Setup textures in the graphics pipeline.
    void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
    /// Setup storage texels in the graphics pipeline.
    void SetupGraphicsStorageTexels(const ShaderEntries& entries, std::size_t stage);
    /// Setup images in the graphics pipeline.
    void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
    /// Setup texel buffers in the compute pipeline.
    void SetupComputeUniformTexels(const ShaderEntries& entries);
    /// Setup textures in the compute pipeline.
    void SetupComputeTextures(const ShaderEntries& entries);
    /// Setup storage texels in the compute pipeline.
    void SetupComputeStorageTexels(const ShaderEntries& entries);
    /// Setup images in the compute pipeline.
    void SetupComputeImages(const ShaderEntries& entries);
    void UpdateViewportsState(Tegra::Engines::Maxwell3D::Regs& regs);
    void UpdateScissorsState(Tegra::Engines::Maxwell3D::Regs& regs);
    void UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs);
@ -198,13 +156,12 @@ private:
    TextureCache texture_cache;
    BufferCacheRuntime buffer_cache_runtime;
    BufferCache buffer_cache;
-    VKPipelineCache pipeline_cache;
+    PipelineCache pipeline_cache;
    VKQueryCache query_cache;
    AccelerateDMA accelerate_dma;
    VKFenceManager fence_manager;
    vk::Event wfi_event;
    VideoCommon::Shader::AsyncShaders async_shaders;
    boost::container::static_vector<u32, MAX_IMAGE_VIEWS> image_view_indices;
    std::array<VideoCommon::ImageViewId, MAX_IMAGE_VIEWS> image_view_ids;
--- a/src/video_core/shader/ast.cpp
+++ b/src/video_core/shader/ast.cpp
@ -1,752 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <string>
 #include <string_view>
 #include <fmt/format.h>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/expr.h"
 namespace VideoCommon::Shader {
 ASTZipper::ASTZipper() = default;
 void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
    ASSERT(new_first->manager == nullptr);
    first = new_first;
    last = new_first;
    ASTNode current = first;
    while (current) {
        current->manager = this;
        current->parent = parent;
        last = current;
        current = current->next;
    }
 }
 void ASTZipper::PushBack(const ASTNode new_node) {
    ASSERT(new_node->manager == nullptr);
    new_node->previous = last;
    if (last) {
        last->next = new_node;
    }
    new_node->next.reset();
    last = new_node;
    if (!first) {
        first = new_node;
    }
    new_node->manager = this;
 }
 void ASTZipper::PushFront(const ASTNode new_node) {
    ASSERT(new_node->manager == nullptr);
    new_node->previous.reset();
    new_node->next = first;
    if (first) {
        first->previous = new_node;
    }
    if (last == first) {
        last = new_node;
    }
    first = new_node;
    new_node->manager = this;
 }
 void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
    ASSERT(new_node->manager == nullptr);
    if (!at_node) {
        PushFront(new_node);
        return;
    }
    const ASTNode next = at_node->next;
    if (next) {
        next->previous = new_node;
    }
    new_node->previous = at_node;
    if (at_node == last) {
        last = new_node;
    }
    new_node->next = next;
    at_node->next = new_node;
    new_node->manager = this;
 }
 void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
    ASSERT(new_node->manager == nullptr);
    if (!at_node) {
        PushBack(new_node);
        return;
    }
    const ASTNode previous = at_node->previous;
    if (previous) {
        previous->next = new_node;
    }
    new_node->next = at_node;
    if (at_node == first) {
        first = new_node;
    }
    new_node->previous = previous;
    at_node->previous = new_node;
    new_node->manager = this;
 }
 void ASTZipper::DetachTail(ASTNode node) {
    ASSERT(node->manager == this);
    if (node == first) {
        first.reset();
        last.reset();
        return;
    }
    last = node->previous;
    last->next.reset();
    node->previous.reset();
    ASTNode current = std::move(node);
    while (current) {
        current->manager = nullptr;
        current->parent.reset();
        current = current->next;
    }
 }
 void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
    ASSERT(start->manager == this && end->manager == this);
    if (start == end) {
        DetachSingle(start);
        return;
    }
    const ASTNode prev = start->previous;
    const ASTNode post = end->next;
    if (!prev) {
        first = post;
    } else {
        prev->next = post;
    }
    if (!post) {
        last = prev;
    } else {
        post->previous = prev;
    }
    start->previous.reset();
    end->next.reset();
    ASTNode current = start;
    bool found = false;
    while (current) {
        current->manager = nullptr;
        current->parent.reset();
        found |= current == end;
        current = current->next;
    }
    ASSERT(found);
 }
 void ASTZipper::DetachSingle(const ASTNode node) {
    ASSERT(node->manager == this);
    const ASTNode prev = node->previous;
    const ASTNode post = node->next;
    node->previous.reset();
    node->next.reset();
    if (!prev) {
        first = post;
    } else {
        prev->next = post;
    }
    if (!post) {
        last = prev;
    } else {
        post->previous = prev;
    }
    node->manager = nullptr;
    node->parent.reset();
 }
 void ASTZipper::Remove(const ASTNode node) {
    ASSERT(node->manager == this);
    const ASTNode next = node->next;
    const ASTNode previous = node->previous;
    if (previous) {
        previous->next = next;
    }
    if (next) {
        next->previous = previous;
    }
    node->parent.reset();
    node->manager = nullptr;
    if (node == last) {
        last = previous;
    }
    if (node == first) {
        first = next;
    }
 }
 class ExprPrinter final {
 public:
    void operator()(const ExprAnd& expr) {
        inner += "( ";
        std::visit(*this, *expr.operand1);
        inner += " && ";
        std::visit(*this, *expr.operand2);
        inner += ')';
    }
    void operator()(const ExprOr& expr) {
        inner += "( ";
        std::visit(*this, *expr.operand1);
        inner += " || ";
        std::visit(*this, *expr.operand2);
        inner += ')';
    }
    void operator()(const ExprNot& expr) {
        inner += "!";
        std::visit(*this, *expr.operand1);
    }
    void operator()(const ExprPredicate& expr) {
        inner += fmt::format("P{}", expr.predicate);
    }
    void operator()(const ExprCondCode& expr) {
        inner += fmt::format("CC{}", expr.cc);
    }
    void operator()(const ExprVar& expr) {
        inner += fmt::format("V{}", expr.var_index);
    }
    void operator()(const ExprBoolean& expr) {
        inner += expr.value ? "true" : "false";
    }
    void operator()(const ExprGprEqual& expr) {
        inner += fmt::format("(gpr_{} == {})", expr.gpr, expr.value);
    }
    const std::string& GetResult() const {
        return inner;
    }
 private:
    std::string inner;
 };
 class ASTPrinter {
 public:
    void operator()(const ASTProgram& ast) {
        scope++;
        inner += "program {\n";
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
        inner += "}\n";
        scope--;
    }
    void operator()(const ASTIfThen& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
        scope++;
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
        scope--;
        inner += fmt::format("{}}}\n", Indent());
    }
    void operator()(const ASTIfElse& ast) {
        inner += Indent();
        inner += "else {\n";
        scope++;
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
        scope--;
        inner += Indent();
        inner += "}\n";
    }
    void operator()(const ASTBlockEncoded& ast) {
        inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
    }
    void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
        inner += Indent();
        inner += "Block;\n";
    }
    void operator()(const ASTVarSet& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
    }
    void operator()(const ASTLabel& ast) {
        inner += fmt::format("Label_{}:\n", ast.index);
    }
    void operator()(const ASTGoto& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner +=
            fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
    }
    void operator()(const ASTDoWhile& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner += fmt::format("{}do {{\n", Indent());
        scope++;
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
        scope--;
        inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
    }
    void operator()(const ASTReturn& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
                             ast.kills ? "discard" : "exit");
    }
    void operator()(const ASTBreak& ast) {
        ExprPrinter expr_parser{};
        std::visit(expr_parser, *ast.condition);
        inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
    }
    void Visit(const ASTNode& node) {
        std::visit(*this, *node->GetInnerData());
    }
    const std::string& GetResult() const {
        return inner;
    }
 private:
    std::string_view Indent() {
        if (space_segment_scope == scope) {
            return space_segment;
        }
        // Ensure that we don't exceed our view.
        ASSERT(scope * 2 < spaces.size());
        space_segment = spaces.substr(0, scope * 2);
        space_segment_scope = scope;
        return space_segment;
    }
    std::string inner{};
    std::string_view space_segment;
    u32 scope{};
    u32 space_segment_scope{};
    static constexpr std::string_view spaces{"                                    "};
 };
 std::string ASTManager::Print() const {
    ASTPrinter printer{};
    printer.Visit(main_node);
    return printer.GetResult();
 }
 ASTManager::ASTManager(bool do_full_decompile, bool disable_else_derivation_)
    : full_decompile{do_full_decompile}, disable_else_derivation{disable_else_derivation_} {}
 ASTManager::~ASTManager() {
    Clear();
 }
 void ASTManager::Init() {
    main_node = ASTBase::Make<ASTProgram>(ASTNode{});
    program = std::get_if<ASTProgram>(main_node->GetInnerData());
    false_condition = MakeExpr<ExprBoolean>(false);
 }
 void ASTManager::DeclareLabel(u32 address) {
    const auto pair = labels_map.emplace(address, labels_count);
    if (pair.second) {
        labels_count++;
        labels.resize(labels_count);
    }
 }
 void ASTManager::InsertLabel(u32 address) {
    const u32 index = labels_map[address];
    const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
    labels[index] = label;
    program->nodes.PushBack(label);
 }
 void ASTManager::InsertGoto(Expr condition, u32 address) {
    const u32 index = labels_map[address];
    const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
    gotos.push_back(goto_node);
    program->nodes.PushBack(goto_node);
 }
 void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
    ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
    program->nodes.PushBack(std::move(block));
 }
 void ASTManager::InsertReturn(Expr condition, bool kills) {
    ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
    program->nodes.PushBack(std::move(node));
 }
 // The decompile algorithm is based on
 // "Taming control flow: A structured approach to eliminating goto statements"
 // by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
 // on the same structured level as the label which they jump to. This is done,
 // through outward/inward movements and lifting. Once they are at the same
 // level, you can enclose them in an "if" structure or a "do-while" structure.
 void ASTManager::Decompile() {
    auto it = gotos.begin();
    while (it != gotos.end()) {
        const ASTNode goto_node = *it;
        const auto label_index = goto_node->GetGotoLabel();
        if (!label_index) {
            return;
        }
        const ASTNode label = labels[*label_index];
        if (!full_decompile) {
            // We only decompile backward jumps
            if (!IsBackwardsJump(goto_node, label)) {
                it++;
                continue;
            }
        }
        if (IndirectlyRelated(goto_node, label)) {
            while (!DirectlyRelated(goto_node, label)) {
                MoveOutward(goto_node);
            }
        }
        if (DirectlyRelated(goto_node, label)) {
            u32 goto_level = goto_node->GetLevel();
            const u32 label_level = label->GetLevel();
            while (label_level < goto_level) {
                MoveOutward(goto_node);
                goto_level--;
            }
            // TODO(Blinkhawk): Implement Lifting and Inward Movements
        }
        if (label->GetParent() == goto_node->GetParent()) {
            bool is_loop = false;
            ASTNode current = goto_node->GetPrevious();
            while (current) {
                if (current == label) {
                    is_loop = true;
                    break;
                }
                current = current->GetPrevious();
            }
            if (is_loop) {
                EncloseDoWhile(goto_node, label);
            } else {
                EncloseIfThen(goto_node, label);
            }
            it = gotos.erase(it);
            continue;
        }
        it++;
    }
    if (full_decompile) {
        for (const ASTNode& label : labels) {
            auto& manager = label->GetManager();
            manager.Remove(label);
        }
        labels.clear();
    } else {
        auto label_it = labels.begin();
        while (label_it != labels.end()) {
            bool can_remove = true;
            ASTNode label = *label_it;
            for (const ASTNode& goto_node : gotos) {
                const auto label_index = goto_node->GetGotoLabel();
                if (!label_index) {
                    return;
                }
                ASTNode& glabel = labels[*label_index];
                if (glabel == label) {
                    can_remove = false;
                    break;
                }
            }
            if (can_remove) {
                label->MarkLabelUnused();
            }
        }
    }
 }
 bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
    u32 goto_level = goto_node->GetLevel();
    u32 label_level = label_node->GetLevel();
    while (goto_level > label_level) {
        goto_level--;
        goto_node = goto_node->GetParent();
    }
    while (label_level > goto_level) {
        label_level--;
        label_node = label_node->GetParent();
    }
    while (goto_node->GetParent() != label_node->GetParent()) {
        goto_node = goto_node->GetParent();
        label_node = label_node->GetParent();
    }
    ASTNode current = goto_node->GetPrevious();
    while (current) {
        if (current == label_node) {
            return true;
        }
        current = current->GetPrevious();
    }
    return false;
 }
 bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
    return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
 }
 bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
    if (first->GetParent() == second->GetParent()) {
        return false;
    }
    const u32 first_level = first->GetLevel();
    const u32 second_level = second->GetLevel();
    u32 min_level;
    u32 max_level;
    ASTNode max;
    ASTNode min;
    if (first_level > second_level) {
        min_level = second_level;
        min = second;
        max_level = first_level;
        max = first;
    } else {
        min_level = first_level;
        min = first;
        max_level = second_level;
        max = second;
    }
    while (max_level > min_level) {
        max_level--;
        max = max->GetParent();
    }
    return min->GetParent() == max->GetParent();
 }
 void ASTManager::ShowCurrentState(std::string_view state) const {
    LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
    SanityCheck();
 }
 void ASTManager::SanityCheck() const {
    for (const auto& label : labels) {
        if (!label->GetParent()) {
            LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
        }
    }
 }
 void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
    ASTZipper& zipper = goto_node->GetManager();
    const ASTNode loop_start = label->GetNext();
    if (loop_start == goto_node) {
        zipper.Remove(goto_node);
        return;
    }
    const ASTNode parent = label->GetParent();
    const Expr condition = goto_node->GetGotoCondition();
    zipper.DetachSegment(loop_start, goto_node);
    const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
    ASTZipper* sub_zipper = do_while_node->GetSubNodes();
    sub_zipper->Init(loop_start, do_while_node);
    zipper.InsertAfter(do_while_node, label);
    sub_zipper->Remove(goto_node);
 }
 void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
    ASTZipper& zipper = goto_node->GetManager();
    const ASTNode if_end = label->GetPrevious();
    if (if_end == goto_node) {
        zipper.Remove(goto_node);
        return;
    }
    const ASTNode prev = goto_node->GetPrevious();
    const Expr condition = goto_node->GetGotoCondition();
    bool do_else = false;
    if (!disable_else_derivation && prev->IsIfThen()) {
        const Expr if_condition = prev->GetIfCondition();
        do_else = ExprAreEqual(if_condition, condition);
    }
    const ASTNode parent = label->GetParent();
    zipper.DetachSegment(goto_node, if_end);
    ASTNode if_node;
    if (do_else) {
        if_node = ASTBase::Make<ASTIfElse>(parent);
    } else {
        Expr neg_condition = MakeExprNot(condition);
        if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
    }
    ASTZipper* sub_zipper = if_node->GetSubNodes();
    sub_zipper->Init(goto_node, if_node);
    zipper.InsertAfter(if_node, prev);
    sub_zipper->Remove(goto_node);
 }
 void ASTManager::MoveOutward(ASTNode goto_node) {
    ASTZipper& zipper = goto_node->GetManager();
    const ASTNode parent = goto_node->GetParent();
    ASTZipper& zipper2 = parent->GetManager();
    const ASTNode grandpa = parent->GetParent();
    const bool is_loop = parent->IsLoop();
    const bool is_else = parent->IsIfElse();
    const bool is_if = parent->IsIfThen();
    const ASTNode prev = goto_node->GetPrevious();
    const ASTNode post = goto_node->GetNext();
    const Expr condition = goto_node->GetGotoCondition();
    zipper.DetachSingle(goto_node);
    if (is_loop) {
        const u32 var_index = NewVariable();
        const Expr var_condition = MakeExpr<ExprVar>(var_index);
        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
        zipper2.InsertBefore(var_node_init, parent);
        zipper.InsertAfter(var_node, prev);
        goto_node->SetGotoCondition(var_condition);
        const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
        zipper.InsertAfter(break_node, var_node);
    } else if (is_if || is_else) {
        const u32 var_index = NewVariable();
        const Expr var_condition = MakeExpr<ExprVar>(var_index);
        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
        if (is_if) {
            zipper2.InsertBefore(var_node_init, parent);
        } else {
            zipper2.InsertBefore(var_node_init, parent->GetPrevious());
        }
        zipper.InsertAfter(var_node, prev);
        goto_node->SetGotoCondition(var_condition);
        if (post) {
            zipper.DetachTail(post);
            const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
            ASTZipper* sub_zipper = if_node->GetSubNodes();
            sub_zipper->Init(post, if_node);
            zipper.InsertAfter(if_node, var_node);
        }
    } else {
        UNREACHABLE();
    }
    const ASTNode next = parent->GetNext();
    if (is_if && next && next->IsIfElse()) {
        zipper2.InsertAfter(goto_node, next);
        goto_node->SetParent(grandpa);
        return;
    }
    zipper2.InsertAfter(goto_node, parent);
    goto_node->SetParent(grandpa);
 }
 class ASTClearer {
 public:
    ASTClearer() = default;
    void operator()(const ASTProgram& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(const ASTIfThen& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(const ASTIfElse& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
    void operator()(ASTBlockDecoded& ast) {
        ast.nodes.clear();
    }
    void operator()([[maybe_unused]] const ASTVarSet& ast) {}
    void operator()([[maybe_unused]] const ASTLabel& ast) {}
    void operator()([[maybe_unused]] const ASTGoto& ast) {}
    void operator()(const ASTDoWhile& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()([[maybe_unused]] const ASTReturn& ast) {}
    void operator()([[maybe_unused]] const ASTBreak& ast) {}
    void Visit(const ASTNode& node) {
        std::visit(*this, *node->GetInnerData());
        node->Clear();
    }
 };
 void ASTManager::Clear() {
    if (!main_node) {
        return;
    }
    ASTClearer clearer{};
    clearer.Visit(main_node);
    main_node.reset();
    program = nullptr;
    labels_map.clear();
    labels.clear();
    gotos.clear();
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/ast.h
+++ b/src/video_core/shader/ast.h
@ -1,398 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <functional>
 #include <list>
 #include <memory>
 #include <optional>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "video_core/shader/expr.h"
 #include "video_core/shader/node.h"
 namespace VideoCommon::Shader {
 class ASTBase;
 class ASTBlockDecoded;
 class ASTBlockEncoded;
 class ASTBreak;
 class ASTDoWhile;
 class ASTGoto;
 class ASTIfElse;
 class ASTIfThen;
 class ASTLabel;
 class ASTProgram;
 class ASTReturn;
 class ASTVarSet;
 using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
                             ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
 using ASTNode = std::shared_ptr<ASTBase>;
 enum class ASTZipperType : u32 {
    Program,
    IfThen,
    IfElse,
    Loop,
 };
 class ASTZipper final {
 public:
    explicit ASTZipper();
    void Init(ASTNode first, ASTNode parent);
    ASTNode GetFirst() const {
        return first;
    }
    ASTNode GetLast() const {
        return last;
    }
    void PushBack(ASTNode new_node);
    void PushFront(ASTNode new_node);
    void InsertAfter(ASTNode new_node, ASTNode at_node);
    void InsertBefore(ASTNode new_node, ASTNode at_node);
    void DetachTail(ASTNode node);
    void DetachSingle(ASTNode node);
    void DetachSegment(ASTNode start, ASTNode end);
    void Remove(ASTNode node);
    ASTNode first;
    ASTNode last;
 };
 class ASTProgram {
 public:
    ASTZipper nodes{};
 };
 class ASTIfThen {
 public:
    explicit ASTIfThen(Expr condition_) : condition{std::move(condition_)} {}
    Expr condition;
    ASTZipper nodes{};
 };
 class ASTIfElse {
 public:
    ASTZipper nodes{};
 };
 class ASTBlockEncoded {
 public:
    explicit ASTBlockEncoded(u32 start_, u32 _) : start{start_}, end{_} {}
    u32 start;
    u32 end;
 };
 class ASTBlockDecoded {
 public:
    explicit ASTBlockDecoded(NodeBlock&& new_nodes_) : nodes(std::move(new_nodes_)) {}
    NodeBlock nodes;
 };
 class ASTVarSet {
 public:
    explicit ASTVarSet(u32 index_, Expr condition_)
        : index{index_}, condition{std::move(condition_)} {}
    u32 index;
    Expr condition;
 };
 class ASTLabel {
 public:
    explicit ASTLabel(u32 index_) : index{index_} {}
    u32 index;
    bool unused{};
 };
 class ASTGoto {
 public:
    explicit ASTGoto(Expr condition_, u32 label_)
        : condition{std::move(condition_)}, label{label_} {}
    Expr condition;
    u32 label;
 };
 class ASTDoWhile {
 public:
    explicit ASTDoWhile(Expr condition_) : condition{std::move(condition_)} {}
    Expr condition;
    ASTZipper nodes{};
 };
 class ASTReturn {
 public:
    explicit ASTReturn(Expr condition_, bool kills_)
        : condition{std::move(condition_)}, kills{kills_} {}
    Expr condition;
    bool kills;
 };
 class ASTBreak {
 public:
    explicit ASTBreak(Expr condition_) : condition{std::move(condition_)} {}
    Expr condition;
 };
 class ASTBase {
 public:
    explicit ASTBase(ASTNode parent_, ASTData data_)
        : data{std::move(data_)}, parent{std::move(parent_)} {}
    template <class U, class... Args>
    static ASTNode Make(ASTNode parent, Args&&... args) {
        return std::make_shared<ASTBase>(std::move(parent),
                                         ASTData(U(std::forward<Args>(args)...)));
    }
    void SetParent(ASTNode new_parent) {
        parent = std::move(new_parent);
    }
    ASTNode& GetParent() {
        return parent;
    }
    const ASTNode& GetParent() const {
        return parent;
    }
    u32 GetLevel() const {
        u32 level = 0;
        auto next_parent = parent;
        while (next_parent) {
            next_parent = next_parent->GetParent();
            level++;
        }
        return level;
    }
    ASTData* GetInnerData() {
        return &data;
    }
    const ASTData* GetInnerData() const {
        return &data;
    }
    ASTNode GetNext() const {
        return next;
    }
    ASTNode GetPrevious() const {
        return previous;
    }
    ASTZipper& GetManager() {
        return *manager;
    }
    const ASTZipper& GetManager() const {
        return *manager;
    }
    std::optional<u32> GetGotoLabel() const {
        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
            return {inner->label};
        }
        return std::nullopt;
    }
    Expr GetGotoCondition() const {
        if (const auto* inner = std::get_if<ASTGoto>(&data)) {
            return inner->condition;
        }
        return nullptr;
    }
    void MarkLabelUnused() {
        if (auto* inner = std::get_if<ASTLabel>(&data)) {
            inner->unused = true;
        }
    }
    bool IsLabelUnused() const {
        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
            return inner->unused;
        }
        return true;
    }
    std::optional<u32> GetLabelIndex() const {
        if (const auto* inner = std::get_if<ASTLabel>(&data)) {
            return {inner->index};
        }
        return std::nullopt;
    }
    Expr GetIfCondition() const {
        if (const auto* inner = std::get_if<ASTIfThen>(&data)) {
            return inner->condition;
        }
        return nullptr;
    }
    void SetGotoCondition(Expr new_condition) {
        if (auto* inner = std::get_if<ASTGoto>(&data)) {
            inner->condition = std::move(new_condition);
        }
    }
    bool IsIfThen() const {
        return std::holds_alternative<ASTIfThen>(data);
    }
    bool IsIfElse() const {
        return std::holds_alternative<ASTIfElse>(data);
    }
    bool IsBlockEncoded() const {
        return std::holds_alternative<ASTBlockEncoded>(data);
    }
    void TransformBlockEncoded(NodeBlock&& nodes) {
        data = ASTBlockDecoded(std::move(nodes));
    }
    bool IsLoop() const {
        return std::holds_alternative<ASTDoWhile>(data);
    }
    ASTZipper* GetSubNodes() {
        if (std::holds_alternative<ASTProgram>(data)) {
            return &std::get_if<ASTProgram>(&data)->nodes;
        }
        if (std::holds_alternative<ASTIfThen>(data)) {
            return &std::get_if<ASTIfThen>(&data)->nodes;
        }
        if (std::holds_alternative<ASTIfElse>(data)) {
            return &std::get_if<ASTIfElse>(&data)->nodes;
        }
        if (std::holds_alternative<ASTDoWhile>(data)) {
            return &std::get_if<ASTDoWhile>(&data)->nodes;
        }
        return nullptr;
    }
    void Clear() {
        next.reset();
        previous.reset();
        parent.reset();
        manager = nullptr;
    }
 private:
    friend class ASTZipper;
    ASTData data;
    ASTNode parent;
    ASTNode next;
    ASTNode previous;
    ASTZipper* manager{};
 };
 class ASTManager final {
 public:
    explicit ASTManager(bool do_full_decompile, bool disable_else_derivation_);
    ~ASTManager();
    ASTManager(const ASTManager& o) = delete;
    ASTManager& operator=(const ASTManager& other) = delete;
    ASTManager(ASTManager&& other) noexcept = default;
    ASTManager& operator=(ASTManager&& other) noexcept = default;
    void Init();
    void DeclareLabel(u32 address);
    void InsertLabel(u32 address);
    void InsertGoto(Expr condition, u32 address);
    void InsertBlock(u32 start_address, u32 end_address);
    void InsertReturn(Expr condition, bool kills);
    std::string Print() const;
    void Decompile();
    void ShowCurrentState(std::string_view state) const;
    void SanityCheck() const;
    void Clear();
    bool IsFullyDecompiled() const {
        if (full_decompile) {
            return gotos.empty();
        }
        for (ASTNode goto_node : gotos) {
            auto label_index = goto_node->GetGotoLabel();
            if (!label_index) {
                return false;
            }
            ASTNode glabel = labels[*label_index];
            if (IsBackwardsJump(goto_node, glabel)) {
                return false;
            }
        }
        return true;
    }
    ASTNode GetProgram() const {
        return main_node;
    }
    u32 GetVariables() const {
        return variables;
    }
    const std::vector<ASTNode>& GetLabels() const {
        return labels;
    }
 private:
    bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
    bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
    bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
    void EncloseDoWhile(ASTNode goto_node, ASTNode label);
    void EncloseIfThen(ASTNode goto_node, ASTNode label);
    void MoveOutward(ASTNode goto_node);
    u32 NewVariable() {
        return variables++;
    }
    bool full_decompile{};
    bool disable_else_derivation{};
    std::unordered_map<u32, u32> labels_map{};
    u32 labels_count{};
    std::vector<ASTNode> labels{};
    std::list<ASTNode> gotos{};
    u32 variables{};
    ASTProgram* program{};
    ASTNode main_node{};
    Expr false_condition{};
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/async_shaders.cpp
+++ b/src/video_core/shader/async_shaders.cpp
@ -1,234 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <condition_variable>
 #include <mutex>
 #include <thread>
 #include <vector>
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_base.h"
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/shader/async_shaders.h"
 namespace VideoCommon::Shader {
 AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window_) : emu_window(emu_window_) {}
 AsyncShaders::~AsyncShaders() {
    KillWorkers();
 }
 void AsyncShaders::AllocateWorkers() {
    // Use at least one thread
    u32 num_workers = 1;
    // Deduce how many more threads we can use
    const u32 thread_count = std::thread::hardware_concurrency();
    if (thread_count >= 8) {
        // Increase async workers by 1 for every 2 threads >= 8
        num_workers += 1 + (thread_count - 8) / 2;
    }
    // If we already have workers queued, ignore
    if (num_workers == worker_threads.size()) {
        return;
    }
    // If workers already exist, clear them
    if (!worker_threads.empty()) {
        FreeWorkers();
    }
    // Create workers
    for (std::size_t i = 0; i < num_workers; i++) {
        context_list.push_back(emu_window.CreateSharedContext());
        worker_threads.emplace_back(&AsyncShaders::ShaderCompilerThread, this,
                                    context_list[i].get());
    }
 }
 void AsyncShaders::FreeWorkers() {
    // Mark all threads to quit
    is_thread_exiting.store(true);
    cv.notify_all();
    for (auto& thread : worker_threads) {
        thread.join();
    }
    // Clear our shared contexts
    context_list.clear();
    // Clear our worker threads
    worker_threads.clear();
 }
 void AsyncShaders::KillWorkers() {
    is_thread_exiting.store(true);
    cv.notify_all();
    for (auto& thread : worker_threads) {
        thread.detach();
    }
    // Clear our shared contexts
    context_list.clear();
    // Clear our worker threads
    worker_threads.clear();
 }
 bool AsyncShaders::HasWorkQueued() const {
    return !pending_queue.empty();
 }
 bool AsyncShaders::HasCompletedWork() const {
    std::shared_lock lock{completed_mutex};
    return !finished_work.empty();
 }
 bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
    const auto& regs = gpu.Maxwell3D().regs;
    // If something is using depth, we can assume that games are not rendering anything which will
    // be used one time.
    if (regs.zeta_enable) {
        return true;
    }
    // If games are using a small index count, we can assume these are full screen quads. Usually
    // these shaders are only used once for building textures so we can assume they can't be built
    // async
    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
        return false;
    }
    return true;
 }
 std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
    std::vector<Result> results;
    {
        std::unique_lock lock{completed_mutex};
        results = std::move(finished_work);
        finished_work.clear();
    }
    return results;
 }
 void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
                                     Tegra::Engines::ShaderType shader_type, u64 uid,
                                     std::vector<u64> code, std::vector<u64> code_b,
                                     u32 main_offset, CompilerSettings compiler_settings,
                                     const Registry& registry, VAddr cpu_addr) {
    std::unique_lock lock(queue_mutex);
    pending_queue.push({
        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
        .device = &device,
        .shader_type = shader_type,
        .uid = uid,
        .code = std::move(code),
        .code_b = std::move(code_b),
        .main_offset = main_offset,
        .compiler_settings = compiler_settings,
        .registry = registry,
        .cpu_address = cpu_addr,
        .pp_cache = nullptr,
        .vk_device = nullptr,
        .scheduler = nullptr,
        .descriptor_pool = nullptr,
        .update_descriptor_queue = nullptr,
        .bindings{},
        .program{},
        .key{},
        .num_color_buffers = 0,
    });
    cv.notify_one();
 }
 void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
                                     const Vulkan::Device& device, Vulkan::VKScheduler& scheduler,
                                     Vulkan::VKDescriptorPool& descriptor_pool,
                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
                                     Vulkan::SPIRVProgram program,
                                     Vulkan::GraphicsPipelineCacheKey key, u32 num_color_buffers) {
    std::unique_lock lock(queue_mutex);
    pending_queue.push({
        .backend = Backend::Vulkan,
        .device = nullptr,
        .shader_type{},
        .uid = 0,
        .code{},
        .code_b{},
        .main_offset = 0,
        .compiler_settings{},
        .registry{},
        .cpu_address = 0,
        .pp_cache = pp_cache,
        .vk_device = &device,
        .scheduler = &scheduler,
        .descriptor_pool = &descriptor_pool,
        .update_descriptor_queue = &update_descriptor_queue,
        .bindings = std::move(bindings),
        .program = std::move(program),
        .key = key,
        .num_color_buffers = num_color_buffers,
    });
    cv.notify_one();
 }
 void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
        std::unique_lock lock{queue_mutex};
        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
        if (is_thread_exiting) {
            return;
        }
        // Partial lock to allow all threads to read at the same time
        if (!HasWorkQueued()) {
            continue;
        }
        // Another thread beat us, just unlock and wait for the next load
        if (pending_queue.empty()) {
            continue;
        }
        // Pull work from queue
        WorkerParams work = std::move(pending_queue.front());
        pending_queue.pop();
        lock.unlock();
        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
            const auto scope = context->Acquire();
            auto program =
                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
            Result result{};
            result.backend = work.backend;
            result.cpu_address = work.cpu_address;
            result.uid = work.uid;
            result.code = std::move(work.code);
            result.code_b = std::move(work.code_b);
            result.shader_type = work.shader_type;
            if (work.backend == Backend::OpenGL) {
                result.program.opengl = std::move(program->source_program);
            } else if (work.backend == Backend::GLASM) {
                result.program.glasm = std::move(program->assembly_program);
            }
            {
                std::unique_lock complete_lock(completed_mutex);
                finished_work.push_back(std::move(result));
            }
        } else if (work.backend == Backend::Vulkan) {
            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
                *work.vk_device, *work.scheduler, *work.descriptor_pool,
                *work.update_descriptor_queue, work.key, work.bindings, work.program,
                work.num_color_buffers);
            work.pp_cache->EmplacePipeline(std::move(pipeline));
        }
    }
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/async_shaders.h
+++ b/src/video_core/shader/async_shaders.h
@ -1,138 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <condition_variable>
 #include <memory>
 #include <shared_mutex>
 #include <thread>
 #include <glad/glad.h>
 #include "common/common_types.h"
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_vulkan/vk_pipeline_cache.h"
 #include "video_core/renderer_vulkan/vk_scheduler.h"
 #include "video_core/vulkan_common/vulkan_device.h"
 namespace Core::Frontend {
 class EmuWindow;
 class GraphicsContext;
 } // namespace Core::Frontend
 namespace Tegra {
 class GPU;
 }
 namespace Vulkan {
 class VKPipelineCache;
 }
 namespace VideoCommon::Shader {
 class AsyncShaders {
 public:
    enum class Backend {
        OpenGL,
        GLASM,
        Vulkan,
    };
    struct ResultPrograms {
        OpenGL::OGLProgram opengl;
        OpenGL::OGLAssemblyProgram glasm;
    };
    struct Result {
        u64 uid;
        VAddr cpu_address;
        Backend backend;
        ResultPrograms program;
        std::vector<u64> code;
        std::vector<u64> code_b;
        Tegra::Engines::ShaderType shader_type;
    };
    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window_);
    ~AsyncShaders();
    /// Start up shader worker threads
    void AllocateWorkers();
    /// Clear the shader queue and kill all worker threads
    void FreeWorkers();
    // Force end all threads
    void KillWorkers();
    /// Check to see if any shaders have actually been compiled
    [[nodiscard]] bool HasCompletedWork() const;
    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
    /// every shader async as some shaders are only built and executed once. We try to "guess" which
    /// shader would be used only once
    [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
    /// Pulls completed compiled shaders
    [[nodiscard]] std::vector<Result> GetCompletedWork();
    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
                           CompilerSettings compiler_settings, const Registry& registry,
                           VAddr cpu_addr);
    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::Device& device,
                           Vulkan::VKScheduler& scheduler,
                           Vulkan::VKDescriptorPool& descriptor_pool,
                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
                           std::vector<VkDescriptorSetLayoutBinding> bindings,
                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key,
                           u32 num_color_buffers);
 private:
    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
    /// Check our worker queue to see if we have any work queued already
    [[nodiscard]] bool HasWorkQueued() const;
    struct WorkerParams {
        Backend backend;
        // For OGL
        const OpenGL::Device* device;
        Tegra::Engines::ShaderType shader_type;
        u64 uid;
        std::vector<u64> code;
        std::vector<u64> code_b;
        u32 main_offset;
        CompilerSettings compiler_settings;
        std::optional<Registry> registry;
        VAddr cpu_address;
        // For Vulkan
        Vulkan::VKPipelineCache* pp_cache;
        const Vulkan::Device* vk_device;
        Vulkan::VKScheduler* scheduler;
        Vulkan::VKDescriptorPool* descriptor_pool;
        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
        std::vector<VkDescriptorSetLayoutBinding> bindings;
        Vulkan::SPIRVProgram program;
        Vulkan::GraphicsPipelineCacheKey key;
        u32 num_color_buffers;
    };
    std::condition_variable cv;
    mutable std::mutex queue_mutex;
    mutable std::shared_mutex completed_mutex;
    std::atomic<bool> is_thread_exiting{};
    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
    std::vector<std::thread> worker_threads;
    std::queue<WorkerParams> pending_queue;
    std::vector<Result> finished_work;
    Core::Frontend::EmuWindow& emu_window;
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/compiler_settings.cpp
+++ b/src/video_core/shader/compiler_settings.cpp
@ -1,26 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "video_core/shader/compiler_settings.h"
 namespace VideoCommon::Shader {
 std::string CompileDepthAsString(const CompileDepth cd) {
    switch (cd) {
    case CompileDepth::BruteForce:
        return "Brute Force Compile";
    case CompileDepth::FlowStack:
        return "Simple Flow Stack Mode";
    case CompileDepth::NoFlowStack:
        return "Remove Flow Stack";
    case CompileDepth::DecompileBackwards:
        return "Decompile Backward Jumps";
    case CompileDepth::FullDecompile:
        return "Full Decompilation";
    default:
        return "Unknown Compiler Process";
    }
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/compiler_settings.h
+++ b/src/video_core/shader/compiler_settings.h
@ -1,26 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include "video_core/engines/shader_bytecode.h"
 namespace VideoCommon::Shader {
 enum class CompileDepth : u32 {
    BruteForce = 0,
    FlowStack = 1,
    NoFlowStack = 2,
    DecompileBackwards = 3,
    FullDecompile = 4,
 };
 std::string CompileDepthAsString(CompileDepth cd);
 struct CompilerSettings {
    CompileDepth depth{CompileDepth::NoFlowStack};
    bool disable_else_derivation{true};
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@ -1,751 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <list>
 #include <map>
 #include <set>
 #include <stack>
 #include <unordered_map>
 #include <vector>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/control_flow.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 namespace {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 constexpr s32 unassigned_branch = -2;
 struct Query {
    u32 address{};
    std::stack<u32> ssy_stack{};
    std::stack<u32> pbk_stack{};
 };
 struct BlockStack {
    BlockStack() = default;
    explicit BlockStack(const Query& q) : ssy_stack{q.ssy_stack}, pbk_stack{q.pbk_stack} {}
    std::stack<u32> ssy_stack{};
    std::stack<u32> pbk_stack{};
 };
 template <typename T, typename... Args>
 BlockBranchInfo MakeBranchInfo(Args&&... args) {
    static_assert(std::is_convertible_v<T, BranchData>);
    return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
 }
 bool BlockBranchIsIgnored(BlockBranchInfo first) {
    bool ignore = false;
    if (std::holds_alternative<SingleBranch>(*first)) {
        const auto branch = std::get_if<SingleBranch>(first.get());
        ignore = branch->ignore;
    }
    return ignore;
 }
 struct BlockInfo {
    u32 start{};
    u32 end{};
    bool visited{};
    BlockBranchInfo branch{};
    bool IsInside(const u32 address) const {
        return start <= address && address <= end;
    }
 };
 struct CFGRebuildState {
    explicit CFGRebuildState(const ProgramCode& program_code_, u32 start_, Registry& registry_)
        : program_code{program_code_}, registry{registry_}, start{start_} {}
    const ProgramCode& program_code;
    Registry& registry;
    u32 start{};
    std::vector<BlockInfo> block_info;
    std::list<u32> inspect_queries;
    std::list<Query> queries;
    std::unordered_map<u32, u32> registered;
    std::set<u32> labels;
    std::map<u32, u32> ssy_labels;
    std::map<u32, u32> pbk_labels;
    std::unordered_map<u32, BlockStack> stacks;
    ASTManager* manager{};
 };
 enum class BlockCollision : u32 { None, Found, Inside };
 std::pair<BlockCollision, u32> TryGetBlock(CFGRebuildState& state, u32 address) {
    const auto& blocks = state.block_info;
    for (u32 index = 0; index < blocks.size(); index++) {
        if (blocks[index].start == address) {
            return {BlockCollision::Found, index};
        }
        if (blocks[index].IsInside(address)) {
            return {BlockCollision::Inside, index};
        }
    }
    return {BlockCollision::None, 0xFFFFFFFF};
 }
 struct ParseInfo {
    BlockBranchInfo branch_info{};
    u32 end_address{};
 };
 BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
    auto& it = state.block_info.emplace_back();
    it.start = start;
    it.end = end;
    const u32 index = static_cast<u32>(state.block_info.size() - 1);
    state.registered.insert({start, index});
    return it;
 }
 Pred GetPredicate(u32 index, bool negated) {
    return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }
 enum class ParseResult : u32 {
    ControlCaught,
    BlockEnd,
    AbnormalFlow,
 };
 struct BranchIndirectInfo {
    u32 buffer{};
    u32 offset{};
    u32 entries{};
    s32 relative_position{};
 };
 struct BufferInfo {
    u32 index;
    u32 offset;
 };
 std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
    const Instruction instr = state.program_code[pos];
    const auto opcode = OpCode::Decode(instr);
    if (opcode->get().GetId() != OpCode::Id::BRX) {
        return std::nullopt;
    }
    if (instr.brx.constant_buffer != 0) {
        return std::nullopt;
    }
    --pos;
    return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
 }
 template <typename Result, typename TestCallable, typename PackCallable>
 // requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
 // requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
 std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
                                       PackCallable pack) {
    for (; pos >= state.start; --pos) {
        if (IsSchedInstruction(pos, state.start)) {
            continue;
        }
        const Instruction instr = state.program_code[pos];
        const auto opcode = OpCode::Decode(instr);
        if (!opcode) {
            continue;
        }
        if (test(instr, opcode->get())) {
            --pos;
            return std::make_optional(pack(instr, opcode->get()));
        }
    }
    return std::nullopt;
 }
 std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
                                                   u64 brx_tracked_register) {
    return TrackInstruction<std::pair<BufferInfo, u64>>(
        state, pos,
        [brx_tracked_register](auto instr, const auto& opcode) {
            return opcode.GetId() == OpCode::Id::LD_C &&
                   instr.gpr0.Value() == brx_tracked_register &&
                   instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
        },
        [](auto instr, const auto& opcode) {
            const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
                                     static_cast<u32>(instr.cbuf36.GetOffset())};
            return std::make_pair(info, instr.gpr8.Value());
        });
 }
 std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
                                    u64 ldc_tracked_register) {
    return TrackInstruction<u64>(
        state, pos,
        [ldc_tracked_register](auto instr, const auto& opcode) {
            return opcode.GetId() == OpCode::Id::SHL_IMM &&
                   instr.gpr0.Value() == ldc_tracked_register;
        },
        [](auto instr, const auto&) { return instr.gpr8.Value(); });
 }
 std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
                                   u64 shl_tracked_register) {
    return TrackInstruction<u32>(
        state, pos,
        [shl_tracked_register](auto instr, const auto& opcode) {
            return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
                   instr.gpr0.Value() == shl_tracked_register;
        },
        [](auto instr, const auto&) {
            return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
        });
 }
 std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
    const auto brx_info = GetBRXInfo(state, pos);
    if (!brx_info) {
        return std::nullopt;
    }
    const auto [relative_position, brx_tracked_register] = *brx_info;
    const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
    if (!ldc_info) {
        return std::nullopt;
    }
    const auto [buffer_info, ldc_tracked_register] = *ldc_info;
    const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
    if (!shl_tracked_register) {
        return std::nullopt;
    }
    const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
    if (!entries) {
        return std::nullopt;
    }
    return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
 }
 std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
    u32 offset = static_cast<u32>(address);
    const u32 end_address = static_cast<u32>(state.program_code.size());
    ParseInfo parse_info{};
    SingleBranch single_branch{};
    const auto insert_label = [](CFGRebuildState& rebuild_state, u32 label_address) {
        const auto pair = rebuild_state.labels.emplace(label_address);
        if (pair.second) {
            rebuild_state.inspect_queries.push_back(label_address);
        }
    };
    while (true) {
        if (offset >= end_address) {
            // ASSERT_OR_EXECUTE can't be used, as it ignores the break
            ASSERT_MSG(false, "Shader passed the current limit!");
            single_branch.address = exit_branch;
            single_branch.ignore = false;
            break;
        }
        if (state.registered.contains(offset)) {
            single_branch.address = offset;
            single_branch.ignore = true;
            break;
        }
        if (IsSchedInstruction(offset, state.start)) {
            offset++;
            continue;
        }
        const Instruction instr = {state.program_code[offset]};
        const auto opcode = OpCode::Decode(instr);
        if (!opcode || opcode->get().GetType() != OpCode::Type::Flow) {
            offset++;
            continue;
        }
        switch (opcode->get().GetId()) {
        case OpCode::Id::EXIT: {
            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
            if (single_branch.condition.predicate == Pred::NeverExecute) {
                offset++;
                continue;
            }
            const ConditionCode cc = instr.flow_condition_code;
            single_branch.condition.cc = cc;
            if (cc == ConditionCode::F) {
                offset++;
                continue;
            }
            single_branch.address = exit_branch;
            single_branch.kill = false;
            single_branch.is_sync = false;
            single_branch.is_brk = false;
            single_branch.ignore = false;
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
                single_branch.condition, single_branch.address, single_branch.kill,
                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
            return {ParseResult::ControlCaught, parse_info};
        }
        case OpCode::Id::BRA: {
            if (instr.bra.constant_buffer != 0) {
                return {ParseResult::AbnormalFlow, parse_info};
            }
            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
            if (single_branch.condition.predicate == Pred::NeverExecute) {
                offset++;
                continue;
            }
            const ConditionCode cc = instr.flow_condition_code;
            single_branch.condition.cc = cc;
            if (cc == ConditionCode::F) {
                offset++;
                continue;
            }
            const u32 branch_offset = offset + instr.bra.GetBranchTarget();
            if (branch_offset == 0) {
                single_branch.address = exit_branch;
            } else {
                single_branch.address = branch_offset;
            }
            insert_label(state, branch_offset);
            single_branch.kill = false;
            single_branch.is_sync = false;
            single_branch.is_brk = false;
            single_branch.ignore = false;
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
                single_branch.condition, single_branch.address, single_branch.kill,
                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
            return {ParseResult::ControlCaught, parse_info};
        }
        case OpCode::Id::SYNC: {
            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
            if (single_branch.condition.predicate == Pred::NeverExecute) {
                offset++;
                continue;
            }
            const ConditionCode cc = instr.flow_condition_code;
            single_branch.condition.cc = cc;
            if (cc == ConditionCode::F) {
                offset++;
                continue;
            }
            single_branch.address = unassigned_branch;
            single_branch.kill = false;
            single_branch.is_sync = true;
            single_branch.is_brk = false;
            single_branch.ignore = false;
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
                single_branch.condition, single_branch.address, single_branch.kill,
                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
            return {ParseResult::ControlCaught, parse_info};
        }
        case OpCode::Id::BRK: {
            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
            if (single_branch.condition.predicate == Pred::NeverExecute) {
                offset++;
                continue;
            }
            const ConditionCode cc = instr.flow_condition_code;
            single_branch.condition.cc = cc;
            if (cc == ConditionCode::F) {
                offset++;
                continue;
            }
            single_branch.address = unassigned_branch;
            single_branch.kill = false;
            single_branch.is_sync = false;
            single_branch.is_brk = true;
            single_branch.ignore = false;
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
                single_branch.condition, single_branch.address, single_branch.kill,
                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
            return {ParseResult::ControlCaught, parse_info};
        }
        case OpCode::Id::KIL: {
            const auto pred_index = static_cast<u32>(instr.pred.pred_index);
            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
            if (single_branch.condition.predicate == Pred::NeverExecute) {
                offset++;
                continue;
            }
            const ConditionCode cc = instr.flow_condition_code;
            single_branch.condition.cc = cc;
            if (cc == ConditionCode::F) {
                offset++;
                continue;
            }
            single_branch.address = exit_branch;
            single_branch.kill = true;
            single_branch.is_sync = false;
            single_branch.is_brk = false;
            single_branch.ignore = false;
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
                single_branch.condition, single_branch.address, single_branch.kill,
                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
            return {ParseResult::ControlCaught, parse_info};
        }
        case OpCode::Id::SSY: {
            const u32 target = offset + instr.bra.GetBranchTarget();
            insert_label(state, target);
            state.ssy_labels.emplace(offset, target);
            break;
        }
        case OpCode::Id::PBK: {
            const u32 target = offset + instr.bra.GetBranchTarget();
            insert_label(state, target);
            state.pbk_labels.emplace(offset, target);
            break;
        }
        case OpCode::Id::BRX: {
            const auto tmp = TrackBranchIndirectInfo(state, offset);
            if (!tmp) {
                LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
                return {ParseResult::AbnormalFlow, parse_info};
            }
            const auto result = *tmp;
            const s32 pc_target = offset + result.relative_position;
            std::vector<CaseBranch> branches;
            for (u32 i = 0; i < result.entries; i++) {
                auto key = state.registry.ObtainKey(result.buffer, result.offset + i * 4);
                if (!key) {
                    return {ParseResult::AbnormalFlow, parse_info};
                }
                u32 value = *key;
                u32 target = static_cast<u32>((value >> 3) + pc_target);
                insert_label(state, target);
                branches.emplace_back(value, target);
            }
            parse_info.end_address = offset;
            parse_info.branch_info = MakeBranchInfo<MultiBranch>(
                static_cast<u32>(instr.gpr8.Value()), std::move(branches));
            return {ParseResult::ControlCaught, parse_info};
        }
        default:
            break;
        }
        offset++;
    }
    single_branch.kill = false;
    single_branch.is_sync = false;
    single_branch.is_brk = false;
    parse_info.end_address = offset - 1;
    parse_info.branch_info = MakeBranchInfo<SingleBranch>(
        single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
        single_branch.is_brk, single_branch.ignore);
    return {ParseResult::BlockEnd, parse_info};
 }
 bool TryInspectAddress(CFGRebuildState& state) {
    if (state.inspect_queries.empty()) {
        return false;
    }
    const u32 address = state.inspect_queries.front();
    state.inspect_queries.pop_front();
    const auto [result, block_index] = TryGetBlock(state, address);
    switch (result) {
    case BlockCollision::Found: {
        return true;
    }
    case BlockCollision::Inside: {
        // This case is the tricky one:
        // We need to split the block into 2 separate blocks
        const u32 end = state.block_info[block_index].end;
        BlockInfo& new_block = CreateBlockInfo(state, address, end);
        BlockInfo& current_block = state.block_info[block_index];
        current_block.end = address - 1;
        new_block.branch = std::move(current_block.branch);
        BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
        const auto branch = std::get_if<SingleBranch>(forward_branch.get());
        branch->address = address;
        branch->ignore = true;
        current_block.branch = std::move(forward_branch);
        return true;
    }
    default:
        break;
    }
    const auto [parse_result, parse_info] = ParseCode(state, address);
    if (parse_result == ParseResult::AbnormalFlow) {
        // if it's AbnormalFlow, we end it as false, ending the CFG reconstruction
        return false;
    }
    BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
    block_info.branch = parse_info.branch_info;
    if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
        const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
        if (branch->condition.IsUnconditional()) {
            return true;
        }
        const u32 fallthrough_address = parse_info.end_address + 1;
        state.inspect_queries.push_front(fallthrough_address);
        return true;
    }
    return true;
 }
 bool TryQuery(CFGRebuildState& state) {
    const auto gather_labels = [](std::stack<u32>& cc, std::map<u32, u32>& labels,
                                  BlockInfo& block) {
        auto gather_start = labels.lower_bound(block.start);
        const auto gather_end = labels.upper_bound(block.end);
        while (gather_start != gather_end) {
            cc.push(gather_start->second);
            ++gather_start;
        }
    };
    if (state.queries.empty()) {
        return false;
    }
    Query& q = state.queries.front();
    const u32 block_index = state.registered[q.address];
    BlockInfo& block = state.block_info[block_index];
    // If the block is visited, check if the stacks match, else gather the ssy/pbk
    // labels into the current stack and look if the branch at the end of the block
    // consumes a label. Schedule new queries accordingly
    if (block.visited) {
        BlockStack& stack = state.stacks[q.address];
        const bool all_okay = (stack.ssy_stack.empty() || q.ssy_stack == stack.ssy_stack) &&
                              (stack.pbk_stack.empty() || q.pbk_stack == stack.pbk_stack);
        state.queries.pop_front();
        return all_okay;
    }
    block.visited = true;
    state.stacks.insert_or_assign(q.address, BlockStack{q});
    Query q2(q);
    state.queries.pop_front();
    gather_labels(q2.ssy_stack, state.ssy_labels, block);
    gather_labels(q2.pbk_stack, state.pbk_labels, block);
    if (std::holds_alternative<SingleBranch>(*block.branch)) {
        auto* branch = std::get_if<SingleBranch>(block.branch.get());
        if (!branch->condition.IsUnconditional()) {
            q2.address = block.end + 1;
            state.queries.push_back(q2);
        }
        auto& conditional_query = state.queries.emplace_back(q2);
        if (branch->is_sync) {
            if (branch->address == unassigned_branch) {
                branch->address = conditional_query.ssy_stack.top();
            }
            conditional_query.ssy_stack.pop();
        }
        if (branch->is_brk) {
            if (branch->address == unassigned_branch) {
                branch->address = conditional_query.pbk_stack.top();
            }
            conditional_query.pbk_stack.pop();
        }
        conditional_query.address = branch->address;
        return true;
    }
    const auto* multi_branch = std::get_if<MultiBranch>(block.branch.get());
    for (const auto& branch_case : multi_branch->branches) {
        auto& conditional_query = state.queries.emplace_back(q2);
        conditional_query.address = branch_case.address;
    }
    return true;
 }
 void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
    const auto get_expr = [](const Condition& cond) -> Expr {
        Expr result;
        if (cond.cc != ConditionCode::T) {
            result = MakeExpr<ExprCondCode>(cond.cc);
        }
        if (cond.predicate != Pred::UnusedIndex) {
            u32 pred = static_cast<u32>(cond.predicate);
            bool negate = false;
            if (pred > 7) {
                negate = true;
                pred -= 8;
            }
            Expr extra = MakeExpr<ExprPredicate>(pred);
            if (negate) {
                extra = MakeExpr<ExprNot>(std::move(extra));
            }
            if (result) {
                return MakeExpr<ExprAnd>(std::move(extra), std::move(result));
            }
            return extra;
        }
        if (result) {
            return result;
        }
        return MakeExpr<ExprBoolean>(true);
    };
    if (std::holds_alternative<SingleBranch>(*branch_info)) {
        const auto* branch = std::get_if<SingleBranch>(branch_info.get());
        if (branch->address < 0) {
            if (branch->kill) {
                mm.InsertReturn(get_expr(branch->condition), true);
                return;
            }
            mm.InsertReturn(get_expr(branch->condition), false);
            return;
        }
        mm.InsertGoto(get_expr(branch->condition), branch->address);
        return;
    }
    const auto* multi_branch = std::get_if<MultiBranch>(branch_info.get());
    for (const auto& branch_case : multi_branch->branches) {
        mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
                      branch_case.address);
    }
 }
 void DecompileShader(CFGRebuildState& state) {
    state.manager->Init();
    for (auto label : state.labels) {
        state.manager->DeclareLabel(label);
    }
    for (const auto& block : state.block_info) {
        if (state.labels.contains(block.start)) {
            state.manager->InsertLabel(block.start);
        }
        const bool ignore = BlockBranchIsIgnored(block.branch);
        const u32 end = ignore ? block.end + 1 : block.end;
        state.manager->InsertBlock(block.start, end);
        if (!ignore) {
            InsertBranch(*state.manager, block.branch);
        }
    }
    state.manager->Decompile();
 }
 } // Anonymous namespace
 std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
                                                const CompilerSettings& settings,
                                                Registry& registry) {
    auto result_out = std::make_unique<ShaderCharacteristics>();
    if (settings.depth == CompileDepth::BruteForce) {
        result_out->settings.depth = CompileDepth::BruteForce;
        return result_out;
    }
    CFGRebuildState state{program_code, start_address, registry};
    // Inspect Code and generate blocks
    state.labels.clear();
    state.labels.emplace(start_address);
    state.inspect_queries.push_back(state.start);
    while (!state.inspect_queries.empty()) {
        if (!TryInspectAddress(state)) {
            result_out->settings.depth = CompileDepth::BruteForce;
            return result_out;
        }
    }
    bool use_flow_stack = true;
    bool decompiled = false;
    if (settings.depth != CompileDepth::FlowStack) {
        // Decompile Stacks
        state.queries.push_back(Query{state.start, {}, {}});
        decompiled = true;
        while (!state.queries.empty()) {
            if (!TryQuery(state)) {
                decompiled = false;
                break;
            }
        }
    }
    use_flow_stack = !decompiled;
    // Sort and organize results
    std::sort(state.block_info.begin(), state.block_info.end(),
              [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
    if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
        ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
                           settings.disable_else_derivation};
        state.manager = &manager;
        DecompileShader(state);
        decompiled = state.manager->IsFullyDecompiled();
        if (!decompiled) {
            if (settings.depth == CompileDepth::FullDecompile) {
                LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
            } else {
                LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
            }
            state.manager->ShowCurrentState("Of Shader");
            state.manager->Clear();
        } else {
            auto characteristics = std::make_unique<ShaderCharacteristics>();
            characteristics->start = start_address;
            characteristics->settings.depth = settings.depth;
            characteristics->manager = std::move(manager);
            characteristics->end = state.block_info.back().end + 1;
            return characteristics;
        }
    }
    result_out->start = start_address;
    result_out->settings.depth =
        use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
    result_out->blocks.clear();
    for (auto& block : state.block_info) {
        ShaderBlock new_block{};
        new_block.start = block.start;
        new_block.end = block.end;
        new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
        if (!new_block.ignore_branch) {
            new_block.branch = block.branch;
        }
        result_out->end = std::max(result_out->end, block.end);
        result_out->blocks.push_back(new_block);
    }
    if (!use_flow_stack) {
        result_out->labels = std::move(state.labels);
        return result_out;
    }
    auto back = result_out->blocks.begin();
    auto next = std::next(back);
    while (next != result_out->blocks.end()) {
        if (!state.labels.contains(next->start) && next->start == back->end + 1) {
            back->end = next->end;
            next = result_out->blocks.erase(next);
            continue;
        }
        back = next;
        ++next;
    }
    return result_out;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@ -1,117 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <list>
 #include <optional>
 #include <set>
 #include <variant>
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Pred;
 constexpr s32 exit_branch = -1;
 struct Condition {
    Pred predicate{Pred::UnusedIndex};
    ConditionCode cc{ConditionCode::T};
    bool IsUnconditional() const {
        return predicate == Pred::UnusedIndex && cc == ConditionCode::T;
    }
    bool operator==(const Condition& other) const {
        return std::tie(predicate, cc) == std::tie(other.predicate, other.cc);
    }
    bool operator!=(const Condition& other) const {
        return !operator==(other);
    }
 };
 class SingleBranch {
 public:
    SingleBranch() = default;
    explicit SingleBranch(Condition condition_, s32 address_, bool kill_, bool is_sync_,
                          bool is_brk_, bool ignore_)
        : condition{condition_}, address{address_}, kill{kill_}, is_sync{is_sync_}, is_brk{is_brk_},
          ignore{ignore_} {}
    bool operator==(const SingleBranch& b) const {
        return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
               std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
    }
    bool operator!=(const SingleBranch& b) const {
        return !operator==(b);
    }
    Condition condition{};
    s32 address{exit_branch};
    bool kill{};
    bool is_sync{};
    bool is_brk{};
    bool ignore{};
 };
 struct CaseBranch {
    explicit CaseBranch(u32 cmp_value_, u32 address_) : cmp_value{cmp_value_}, address{address_} {}
    u32 cmp_value;
    u32 address;
 };
 class MultiBranch {
 public:
    explicit MultiBranch(u32 gpr_, std::vector<CaseBranch>&& branches_)
        : gpr{gpr_}, branches{std::move(branches_)} {}
    u32 gpr{};
    std::vector<CaseBranch> branches{};
 };
 using BranchData = std::variant<SingleBranch, MultiBranch>;
 using BlockBranchInfo = std::shared_ptr<BranchData>;
 bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
 struct ShaderBlock {
    u32 start{};
    u32 end{};
    bool ignore_branch{};
    BlockBranchInfo branch{};
    bool operator==(const ShaderBlock& sb) const {
        return std::tie(start, end, ignore_branch) ==
                   std::tie(sb.start, sb.end, sb.ignore_branch) &&
               BlockBranchInfoAreEqual(branch, sb.branch);
    }
    bool operator!=(const ShaderBlock& sb) const {
        return !operator==(sb);
    }
 };
 struct ShaderCharacteristics {
    std::list<ShaderBlock> blocks{};
    std::set<u32> labels{};
    u32 start{};
    u32 end{};
    ASTManager manager{true, true};
    CompilerSettings settings{};
 };
 std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
                                                const CompilerSettings& settings,
                                                Registry& registry);
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@ -1,368 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <cstring>
 #include <limits>
 #include <set>
 #include <fmt/format.h>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/control_flow.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 namespace {
 void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
                              const std::list<SamplerEntry>& used_samplers) {
    if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
        return;
    }
    u32 count{};
    std::vector<u32> bound_offsets;
    for (const auto& sampler : used_samplers) {
        if (sampler.is_bindless) {
            continue;
        }
        ++count;
        bound_offsets.emplace_back(sampler.offset);
    }
    if (count > 1) {
        gpu_driver.DeduceTextureHandlerSize(std::move(bound_offsets));
    }
 }
 std::optional<u32> TryDeduceSamplerSize(const SamplerEntry& sampler_to_deduce,
                                        VideoCore::GuestDriverProfile& gpu_driver,
                                        const std::list<SamplerEntry>& used_samplers) {
    const u32 base_offset = sampler_to_deduce.offset;
    u32 max_offset{std::numeric_limits<u32>::max()};
    for (const auto& sampler : used_samplers) {
        if (sampler.is_bindless) {
            continue;
        }
        if (sampler.offset > base_offset) {
            max_offset = std::min(sampler.offset, max_offset);
        }
    }
    if (max_offset == std::numeric_limits<u32>::max()) {
        return std::nullopt;
    }
    return ((max_offset - base_offset) * 4) / gpu_driver.GetTextureHandlerSize();
 }
 } // Anonymous namespace
 class ASTDecoder {
 public:
    explicit ASTDecoder(ShaderIR& ir_) : ir(ir_) {}
    void operator()(ASTProgram& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(ASTIfThen& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(ASTIfElse& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(ASTBlockEncoded& ast) {}
    void operator()(ASTBlockDecoded& ast) {}
    void operator()(ASTVarSet& ast) {}
    void operator()(ASTLabel& ast) {}
    void operator()(ASTGoto& ast) {}
    void operator()(ASTDoWhile& ast) {
        ASTNode current = ast.nodes.GetFirst();
        while (current) {
            Visit(current);
            current = current->GetNext();
        }
    }
    void operator()(ASTReturn& ast) {}
    void operator()(ASTBreak& ast) {}
    void Visit(ASTNode& node) {
        std::visit(*this, *node->GetInnerData());
        if (node->IsBlockEncoded()) {
            auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
            NodeBlock bb = ir.DecodeRange(block->start, block->end);
            node->TransformBlockEncoded(std::move(bb));
        }
    }
 private:
    ShaderIR& ir;
 };
 void ShaderIR::Decode() {
    std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
    decompiled = false;
    auto info = ScanFlow(program_code, main_offset, settings, registry);
    auto& shader_info = *info;
    coverage_begin = shader_info.start;
    coverage_end = shader_info.end;
    switch (shader_info.settings.depth) {
    case CompileDepth::FlowStack: {
        for (const auto& block : shader_info.blocks) {
            basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
        }
        break;
    }
    case CompileDepth::NoFlowStack: {
        disable_flow_stack = true;
        const auto insert_block = [this](NodeBlock& nodes, u32 label) {
            if (label == static_cast<u32>(exit_branch)) {
                return;
            }
            basic_blocks.insert({label, nodes});
        };
        const auto& blocks = shader_info.blocks;
        NodeBlock current_block;
        u32 current_label = static_cast<u32>(exit_branch);
        for (const auto& block : blocks) {
            if (shader_info.labels.contains(block.start)) {
                insert_block(current_block, current_label);
                current_block.clear();
                current_label = block.start;
            }
            if (!block.ignore_branch) {
                DecodeRangeInner(current_block, block.start, block.end);
                InsertControlFlow(current_block, block);
            } else {
                DecodeRangeInner(current_block, block.start, block.end + 1);
            }
        }
        insert_block(current_block, current_label);
        break;
    }
    case CompileDepth::DecompileBackwards:
    case CompileDepth::FullDecompile: {
        program_manager = std::move(shader_info.manager);
        disable_flow_stack = true;
        decompiled = true;
        ASTDecoder decoder{*this};
        ASTNode program = GetASTProgram();
        decoder.Visit(program);
        break;
    }
    default:
        LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
        [[fallthrough]];
    case CompileDepth::BruteForce: {
        const auto shader_end = static_cast<u32>(program_code.size());
        coverage_begin = main_offset;
        coverage_end = shader_end;
        for (u32 label = main_offset; label < shader_end; ++label) {
            basic_blocks.insert({label, DecodeRange(label, label + 1)});
        }
        break;
    }
    }
    if (settings.depth != shader_info.settings.depth) {
        LOG_WARNING(
            HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
            CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
    }
 }
 NodeBlock ShaderIR::DecodeRange(u32 begin, u32 end) {
    NodeBlock basic_block;
    DecodeRangeInner(basic_block, begin, end);
    return basic_block;
 }
 void ShaderIR::DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end) {
    for (u32 pc = begin; pc < (begin > end ? MAX_PROGRAM_LENGTH : end);) {
        pc = DecodeInstr(bb, pc);
    }
 }
 void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
    const auto apply_conditions = [&](const Condition& cond, Node n) -> Node {
        Node result = n;
        if (cond.cc != ConditionCode::T) {
            result = Conditional(GetConditionCode(cond.cc), {result});
        }
        if (cond.predicate != Pred::UnusedIndex) {
            u32 pred = static_cast<u32>(cond.predicate);
            const bool is_neg = pred > 7;
            if (is_neg) {
                pred -= 8;
            }
            result = Conditional(GetPredicate(pred, is_neg), {result});
        }
        return result;
    };
    if (std::holds_alternative<SingleBranch>(*block.branch)) {
        auto branch = std::get_if<SingleBranch>(block.branch.get());
        if (branch->address < 0) {
            if (branch->kill) {
                Node n = Operation(OperationCode::Discard);
                n = apply_conditions(branch->condition, n);
                bb.push_back(n);
                global_code.push_back(n);
                return;
            }
            Node n = Operation(OperationCode::Exit);
            n = apply_conditions(branch->condition, n);
            bb.push_back(n);
            global_code.push_back(n);
            return;
        }
        Node n = Operation(OperationCode::Branch, Immediate(branch->address));
        n = apply_conditions(branch->condition, n);
        bb.push_back(n);
        global_code.push_back(n);
        return;
    }
    auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
    Node op_a = GetRegister(multi_branch->gpr);
    for (auto& branch_case : multi_branch->branches) {
        Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
        Node op_b = Immediate(branch_case.cmp_value);
        Node condition =
            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::EQ, false, op_a, op_b);
        auto result = Conditional(condition, {n});
        bb.push_back(result);
        global_code.push_back(result);
    }
 }
 u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
    // Ignore sched instructions when generating code.
    if (IsSchedInstruction(pc, main_offset)) {
        return pc + 1;
    }
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    const u32 nv_address = ConvertAddressToNvidiaSpace(pc);
    // Decoding failure
    if (!opcode) {
        UNIMPLEMENTED_MSG("Unhandled instruction: {0:x}", instr.value);
        bb.push_back(Comment(fmt::format("{:05x} Unimplemented Shader instruction (0x{:016x})",
                                         nv_address, instr.value)));
        return pc + 1;
    }
    bb.push_back(Comment(
        fmt::format("{:05x} {} (0x{:016x})", nv_address, opcode->get().GetName(), instr.value)));
    using Tegra::Shader::Pred;
    UNIMPLEMENTED_IF_MSG(instr.pred.full_pred == Pred::NeverExecute,
                         "NeverExecute predicate not implemented");
    static const std::map<OpCode::Type, u32 (ShaderIR::*)(NodeBlock&, u32)> decoders = {
        {OpCode::Type::Arithmetic, &ShaderIR::DecodeArithmetic},
        {OpCode::Type::ArithmeticImmediate, &ShaderIR::DecodeArithmeticImmediate},
        {OpCode::Type::Bfe, &ShaderIR::DecodeBfe},
        {OpCode::Type::Bfi, &ShaderIR::DecodeBfi},
        {OpCode::Type::Shift, &ShaderIR::DecodeShift},
        {OpCode::Type::ArithmeticInteger, &ShaderIR::DecodeArithmeticInteger},
        {OpCode::Type::ArithmeticIntegerImmediate, &ShaderIR::DecodeArithmeticIntegerImmediate},
        {OpCode::Type::ArithmeticHalf, &ShaderIR::DecodeArithmeticHalf},
        {OpCode::Type::ArithmeticHalfImmediate, &ShaderIR::DecodeArithmeticHalfImmediate},
        {OpCode::Type::Ffma, &ShaderIR::DecodeFfma},
        {OpCode::Type::Hfma2, &ShaderIR::DecodeHfma2},
        {OpCode::Type::Conversion, &ShaderIR::DecodeConversion},
        {OpCode::Type::Warp, &ShaderIR::DecodeWarp},
        {OpCode::Type::Memory, &ShaderIR::DecodeMemory},
        {OpCode::Type::Texture, &ShaderIR::DecodeTexture},
        {OpCode::Type::Image, &ShaderIR::DecodeImage},
        {OpCode::Type::FloatSetPredicate, &ShaderIR::DecodeFloatSetPredicate},
        {OpCode::Type::IntegerSetPredicate, &ShaderIR::DecodeIntegerSetPredicate},
        {OpCode::Type::HalfSetPredicate, &ShaderIR::DecodeHalfSetPredicate},
        {OpCode::Type::PredicateSetRegister, &ShaderIR::DecodePredicateSetRegister},
        {OpCode::Type::PredicateSetPredicate, &ShaderIR::DecodePredicateSetPredicate},
        {OpCode::Type::RegisterSetPredicate, &ShaderIR::DecodeRegisterSetPredicate},
        {OpCode::Type::FloatSet, &ShaderIR::DecodeFloatSet},
        {OpCode::Type::IntegerSet, &ShaderIR::DecodeIntegerSet},
        {OpCode::Type::HalfSet, &ShaderIR::DecodeHalfSet},
        {OpCode::Type::Video, &ShaderIR::DecodeVideo},
        {OpCode::Type::Xmad, &ShaderIR::DecodeXmad},
    };
    std::vector<Node> tmp_block;
    if (const auto decoder = decoders.find(opcode->get().GetType()); decoder != decoders.end()) {
        pc = (this->*decoder->second)(tmp_block, pc);
    } else {
        pc = DecodeOther(tmp_block, pc);
    }
    // Some instructions (like SSY) don't have a predicate field, they are always unconditionally
    // executed.
    const bool can_be_predicated = OpCode::IsPredicatedInstruction(opcode->get().GetId());
    const auto pred_index = static_cast<u32>(instr.pred.pred_index);
    if (can_be_predicated && pred_index != static_cast<u32>(Pred::UnusedIndex)) {
        const Node conditional =
            Conditional(GetPredicate(pred_index, instr.negate_pred != 0), std::move(tmp_block));
        global_code.push_back(conditional);
        bb.push_back(conditional);
    } else {
        for (auto& node : tmp_block) {
            global_code.push_back(node);
            bb.push_back(node);
        }
    }
    return pc + 1;
 }
 void ShaderIR::PostDecode() {
    // Deduce texture handler size if needed
    auto gpu_driver = registry.AccessGuestDriverProfile();
    DeduceTextureHandlerSize(gpu_driver, used_samplers);
    // Deduce Indexed Samplers
    if (!uses_indexed_samplers) {
        return;
    }
    for (auto& sampler : used_samplers) {
        if (!sampler.is_indexed) {
            continue;
        }
        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
            sampler.size = *size;
        } else {
            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
            sampler.size = 1;
        }
    }
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@ -1,166 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::SubOp;
 u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = [&] {
        if (instr.is_b_imm) {
            return GetImmediate19(instr);
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    switch (opcode->get().GetId()) {
    case OpCode::Id::MOV_C:
    case OpCode::Id::MOV_R: {
        // MOV does not have neither 'abs' nor 'neg' bits.
        SetRegister(bb, instr.gpr0, op_b);
        break;
    }
    case OpCode::Id::FMUL_C:
    case OpCode::Id::FMUL_R:
    case OpCode::Id::FMUL_IMM: {
        // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
        if (instr.fmul.tab5cb8_2 != 0) {
            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
                      instr.fmul.tab5cb8_2.Value());
        }
        if (instr.fmul.tab5c68_0 != 1) {
            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
                      instr.fmul.tab5c68_0.Value());
        }
        op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
        static constexpr std::array FmulPostFactor = {
            1.000f, // None
            0.500f, // Divide 2
            0.250f, // Divide 4
            0.125f, // Divide 8
            8.000f, // Mul 8
            4.000f, // Mul 4
            2.000f, // Mul 2
        };
        if (instr.fmul.postfactor != 0) {
            op_a = Operation(OperationCode::FMul, NO_PRECISE, op_a,
                             Immediate(FmulPostFactor[instr.fmul.postfactor]));
        }
        // TODO(Rodrigo): Should precise be used when there's a postfactor?
        Node value = Operation(OperationCode::FMul, PRECISE, op_a, op_b);
        value = GetSaturatedFloat(value, instr.alu.saturate_d);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::FADD_C:
    case OpCode::Id::FADD_R:
    case OpCode::Id::FADD_IMM: {
        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
        Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
        value = GetSaturatedFloat(value, instr.alu.saturate_d);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::MUFU: {
        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
        Node value = [&]() {
            switch (instr.sub_op) {
            case SubOp::Cos:
                return Operation(OperationCode::FCos, PRECISE, op_a);
            case SubOp::Sin:
                return Operation(OperationCode::FSin, PRECISE, op_a);
            case SubOp::Ex2:
                return Operation(OperationCode::FExp2, PRECISE, op_a);
            case SubOp::Lg2:
                return Operation(OperationCode::FLog2, PRECISE, op_a);
            case SubOp::Rcp:
                return Operation(OperationCode::FDiv, PRECISE, Immediate(1.0f), op_a);
            case SubOp::Rsq:
                return Operation(OperationCode::FInverseSqrt, PRECISE, op_a);
            case SubOp::Sqrt:
                return Operation(OperationCode::FSqrt, PRECISE, op_a);
            default:
                UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}", instr.sub_op.Value());
                return Immediate(0);
            }
        }();
        value = GetSaturatedFloat(value, instr.alu.saturate_d);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::FMNMX_C:
    case OpCode::Id::FMNMX_R:
    case OpCode::Id::FMNMX_IMM: {
        op_a = GetOperandAbsNegFloat(op_a, instr.alu.abs_a, instr.alu.negate_a);
        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
        const Node condition = GetPredicate(instr.alu.fmnmx.pred, instr.alu.fmnmx.negate_pred != 0);
        const Node min = Operation(OperationCode::FMin, NO_PRECISE, op_a, op_b);
        const Node max = Operation(OperationCode::FMax, NO_PRECISE, op_a, op_b);
        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::FCMP_RR:
    case OpCode::Id::FCMP_RC:
    case OpCode::Id::FCMP_IMMR: {
        UNIMPLEMENTED_IF(instr.fcmp.ftz == 0);
        Node op_c = GetRegister(instr.gpr39);
        Node comp = GetPredicateComparisonFloat(instr.fcmp.cond, std::move(op_c), Immediate(0.0f));
        SetRegister(
            bb, instr.gpr0,
            Operation(OperationCode::Select, std::move(comp), std::move(op_a), std::move(op_b)));
        break;
    }
    case OpCode::Id::RRO_C:
    case OpCode::Id::RRO_R:
    case OpCode::Id::RRO_IMM: {
        LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
        // Currently RRO is only implemented as a register move.
        op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
        SetRegister(bb, instr.gpr0, op_b);
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled arithmetic instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@ -1,101 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    bool negate_a = false;
    bool negate_b = false;
    bool absolute_a = false;
    bool absolute_b = false;
    switch (opcode->get().GetId()) {
    case OpCode::Id::HADD2_R:
        if (instr.alu_half.ftz == 0) {
            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
        }
        negate_a = ((instr.value >> 43) & 1) != 0;
        negate_b = ((instr.value >> 31) & 1) != 0;
        absolute_a = ((instr.value >> 44) & 1) != 0;
        absolute_b = ((instr.value >> 30) & 1) != 0;
        break;
    case OpCode::Id::HADD2_C:
        if (instr.alu_half.ftz == 0) {
            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
        }
        negate_a = ((instr.value >> 43) & 1) != 0;
        negate_b = ((instr.value >> 56) & 1) != 0;
        absolute_a = ((instr.value >> 44) & 1) != 0;
        absolute_b = ((instr.value >> 54) & 1) != 0;
        break;
    case OpCode::Id::HMUL2_R:
        negate_a = ((instr.value >> 43) & 1) != 0;
        absolute_a = ((instr.value >> 44) & 1) != 0;
        absolute_b = ((instr.value >> 30) & 1) != 0;
        break;
    case OpCode::Id::HMUL2_C:
        negate_b = ((instr.value >> 31) & 1) != 0;
        absolute_a = ((instr.value >> 44) & 1) != 0;
        absolute_b = ((instr.value >> 54) & 1) != 0;
        break;
    default:
        UNREACHABLE();
        break;
    }
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half.type_a);
    op_a = GetOperandAbsNegHalf(op_a, absolute_a, negate_a);
    auto [type_b, op_b] = [this, instr, opcode]() -> std::pair<HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HMUL2_C:
            return {HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HADD2_R:
        case OpCode::Id::HMUL2_R:
            return {instr.alu_half.type_b, GetRegister(instr.gpr20)};
        default:
            UNREACHABLE();
            return {HalfType::F32, Immediate(0)};
        }
    }();
    op_b = UnpackHalfFloat(op_b, type_b);
    op_b = GetOperandAbsNegHalf(op_b, absolute_b, negate_b);
    Node value = [this, opcode, op_a, op_b = op_b] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_C:
        case OpCode::Id::HADD2_R:
            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
        case OpCode::Id::HMUL2_C:
        case OpCode::Id::HMUL2_R:
            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
        default:
            UNIMPLEMENTED_MSG("Unhandled half float instruction: {}", opcode->get().GetName());
            return Immediate(0);
        }
    }();
    value = GetSaturatedHalfFloat(value, instr.alu_half.saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half.merge);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@ -1,54 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
        if (instr.alu_half_imm.ftz == 0) {
            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
        }
    } else {
        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
        }
    }
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.alu_half_imm.type_a);
    op_a = GetOperandAbsNegHalf(op_a, instr.alu_half_imm.abs_a, instr.alu_half_imm.negate_a);
    const Node op_b = UnpackHalfImmediate(instr, true);
    Node value = [&]() {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HADD2_IMM:
            return Operation(OperationCode::HAdd, PRECISE, op_a, op_b);
        case OpCode::Id::HMUL2_IMM:
            return Operation(OperationCode::HMul, PRECISE, op_a, op_b);
        default:
            UNREACHABLE();
            return Immediate(0);
        }
    }();
    value = GetSaturatedHalfFloat(value, instr.alu_half_imm.saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.alu_half_imm.merge);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_immediate.cpp
@ -1,53 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeArithmeticImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    switch (opcode->get().GetId()) {
    case OpCode::Id::MOV32_IMM: {
        SetRegister(bb, instr.gpr0, GetImmediate32(instr));
        break;
    }
    case OpCode::Id::FMUL32_IMM: {
        Node value =
            Operation(OperationCode::FMul, PRECISE, GetRegister(instr.gpr8), GetImmediate32(instr));
        value = GetSaturatedFloat(value, instr.fmul32.saturate);
        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::FADD32I: {
        const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fadd32i.abs_a,
                                                instr.fadd32i.negate_a);
        const Node op_b = GetOperandAbsNegFloat(GetImmediate32(instr), instr.fadd32i.abs_b,
                                                instr.fadd32i.negate_b);
        const Node value = Operation(OperationCode::FAdd, PRECISE, op_a, op_b);
        SetInternalFlagsFromFloat(bb, value, instr.op_32.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled arithmetic immediate instruction: {}",
                          opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@ -1,375 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::IAdd3Height;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::Register;
 u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = [&]() {
        if (instr.is_b_imm) {
            return Immediate(instr.alu.GetSignedImm20_20());
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    switch (opcode->get().GetId()) {
    case OpCode::Id::IADD_C:
    case OpCode::Id::IADD_R:
    case OpCode::Id::IADD_IMM: {
        UNIMPLEMENTED_IF_MSG(instr.alu.saturate_d, "IADD.SAT");
        UNIMPLEMENTED_IF_MSG(instr.iadd.x && instr.generates_cc, "IADD.X Rd.CC");
        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
        Node value = Operation(OperationCode::UAdd, op_a, op_b);
        if (instr.iadd.x) {
            Node carry = GetInternalFlag(InternalFlag::Carry);
            Node x = Operation(OperationCode::Select, std::move(carry), Immediate(1), Immediate(0));
            value = Operation(OperationCode::UAdd, std::move(value), std::move(x));
        }
        if (instr.generates_cc) {
            const Node i0 = Immediate(0);
            Node zero = Operation(OperationCode::LogicalIEqual, value, i0);
            Node sign = Operation(OperationCode::LogicalILessThan, value, i0);
            Node carry = Operation(OperationCode::LogicalAddCarry, op_a, op_b);
            Node pos_a = Operation(OperationCode::LogicalIGreaterThan, op_a, i0);
            Node pos_b = Operation(OperationCode::LogicalIGreaterThan, op_b, i0);
            Node pos = Operation(OperationCode::LogicalAnd, std::move(pos_a), std::move(pos_b));
            Node overflow = Operation(OperationCode::LogicalAnd, pos, sign);
            SetInternalFlag(bb, InternalFlag::Zero, std::move(zero));
            SetInternalFlag(bb, InternalFlag::Sign, std::move(sign));
            SetInternalFlag(bb, InternalFlag::Carry, std::move(carry));
            SetInternalFlag(bb, InternalFlag::Overflow, std::move(overflow));
        }
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::IADD3_C:
    case OpCode::Id::IADD3_R:
    case OpCode::Id::IADD3_IMM: {
        Node op_c = GetRegister(instr.gpr39);
        const auto ApplyHeight = [&](IAdd3Height height, Node value) {
            switch (height) {
            case IAdd3Height::None:
                return value;
            case IAdd3Height::LowerHalfWord:
                return BitfieldExtract(value, 0, 16);
            case IAdd3Height::UpperHalfWord:
                return BitfieldExtract(value, 16, 16);
            default:
                UNIMPLEMENTED_MSG("Unhandled IADD3 height: {}", height);
                return Immediate(0);
            }
        };
        if (opcode->get().GetId() == OpCode::Id::IADD3_R) {
            op_a = ApplyHeight(instr.iadd3.height_a, op_a);
            op_b = ApplyHeight(instr.iadd3.height_b, op_b);
            op_c = ApplyHeight(instr.iadd3.height_c, op_c);
        }
        op_a = GetOperandAbsNegInteger(op_a, false, instr.iadd3.neg_a, true);
        op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
        op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
        const Node value = [&] {
            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
            if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
                return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
            }
            const Node shifted = [&] {
                switch (instr.iadd3.mode) {
                case Tegra::Shader::IAdd3Mode::RightShift:
                    // TODO(tech4me): According to
                    // https://envytools.readthedocs.io/en/latest/hw/graph/maxwell/cuda/int.html?highlight=iadd3
                    // The addition between op_a and op_b should be done in uint33, more
                    // investigation required
                    return Operation(OperationCode::ILogicalShiftRight, NO_PRECISE, add_ab,
                                     Immediate(16));
                case Tegra::Shader::IAdd3Mode::LeftShift:
                    return Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, add_ab,
                                     Immediate(16));
                default:
                    return add_ab;
                }
            }();
            return Operation(OperationCode::IAdd, NO_PRECISE, shifted, op_c);
        }();
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::ISCADD_C:
    case OpCode::Id::ISCADD_R:
    case OpCode::Id::ISCADD_IMM: {
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in ISCADD is not implemented");
        op_a = GetOperandAbsNegInteger(op_a, false, instr.alu_integer.negate_a, true);
        op_b = GetOperandAbsNegInteger(op_b, false, instr.alu_integer.negate_b, true);
        const Node shift = Immediate(static_cast<u32>(instr.alu_integer.shift_amount));
        const Node shifted_a = Operation(OperationCode::ILogicalShiftLeft, NO_PRECISE, op_a, shift);
        const Node value = Operation(OperationCode::IAdd, NO_PRECISE, shifted_a, op_b);
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::POPC_C:
    case OpCode::Id::POPC_R:
    case OpCode::Id::POPC_IMM: {
        if (instr.popc.invert) {
            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
        }
        const Node value = Operation(OperationCode::IBitCount, PRECISE, op_b);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::FLO_R:
    case OpCode::Id::FLO_C:
    case OpCode::Id::FLO_IMM: {
        Node value;
        if (instr.flo.invert) {
            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
        }
        if (instr.flo.is_signed) {
            value = Operation(OperationCode::IBitMSB, NO_PRECISE, std::move(op_b));
        } else {
            value = Operation(OperationCode::UBitMSB, NO_PRECISE, std::move(op_b));
        }
        if (instr.flo.sh) {
            value =
                Operation(OperationCode::UBitwiseXor, NO_PRECISE, std::move(value), Immediate(31));
        }
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::SEL_C:
    case OpCode::Id::SEL_R:
    case OpCode::Id::SEL_IMM: {
        const Node condition = GetPredicate(instr.sel.pred, instr.sel.neg_pred != 0);
        const Node value = Operation(OperationCode::Select, PRECISE, condition, op_a, op_b);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::ICMP_CR:
    case OpCode::Id::ICMP_R:
    case OpCode::Id::ICMP_RC:
    case OpCode::Id::ICMP_IMM: {
        const Node zero = Immediate(0);
        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
            switch (opcode->get().GetId()) {
            case OpCode::Id::ICMP_CR:
                return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                        GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_R:
                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
            case OpCode::Id::ICMP_RC:
                return {GetRegister(instr.gpr39),
                        GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
            case OpCode::Id::ICMP_IMM:
                return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
            default:
                UNREACHABLE();
                return {zero, zero};
            }
        }();
        const Node op_lhs = GetRegister(instr.gpr8);
        const Node comparison =
            GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
        break;
    }
    case OpCode::Id::LOP_C:
    case OpCode::Id::LOP_R:
    case OpCode::Id::LOP_IMM: {
        if (instr.alu.lop.invert_a)
            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_a);
        if (instr.alu.lop.invert_b)
            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, op_b);
        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop.operation, op_a, op_b,
                            instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
                            instr.generates_cc);
        break;
    }
    case OpCode::Id::LOP3_C:
    case OpCode::Id::LOP3_R:
    case OpCode::Id::LOP3_IMM: {
        const Node op_c = GetRegister(instr.gpr39);
        const Node lut = [&]() {
            if (opcode->get().GetId() == OpCode::Id::LOP3_R) {
                return Immediate(instr.alu.lop3.GetImmLut28());
            } else {
                return Immediate(instr.alu.lop3.GetImmLut48());
            }
        }();
        WriteLop3Instruction(bb, instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
        break;
    }
    case OpCode::Id::IMNMX_C:
    case OpCode::Id::IMNMX_R:
    case OpCode::Id::IMNMX_IMM: {
        UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
        const bool is_signed = instr.imnmx.is_signed;
        const Node condition = GetPredicate(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
        const Node min = SignedOperation(OperationCode::IMin, is_signed, NO_PRECISE, op_a, op_b);
        const Node max = SignedOperation(OperationCode::IMax, is_signed, NO_PRECISE, op_a, op_b);
        const Node value = Operation(OperationCode::Select, NO_PRECISE, condition, min, max);
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::LEA_R2:
    case OpCode::Id::LEA_R1:
    case OpCode::Id::LEA_IMM:
    case OpCode::Id::LEA_RZ:
    case OpCode::Id::LEA_HI: {
        auto [op_a_, op_b_, op_c_] = [&]() -> std::tuple<Node, Node, Node> {
            switch (opcode->get().GetId()) {
            case OpCode::Id::LEA_R2: {
                return {GetRegister(instr.gpr20), GetRegister(instr.gpr39),
                        Immediate(static_cast<u32>(instr.lea.r2.entry_a))};
            }
            case OpCode::Id::LEA_R1: {
                const bool neg = instr.lea.r1.neg != 0;
                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                        GetRegister(instr.gpr20),
                        Immediate(static_cast<u32>(instr.lea.r1.entry_a))};
            }
            case OpCode::Id::LEA_IMM: {
                const bool neg = instr.lea.imm.neg != 0;
                return {GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                        Immediate(static_cast<u32>(instr.lea.imm.entry_a)),
                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
            }
            case OpCode::Id::LEA_RZ: {
                const bool neg = instr.lea.rz.neg != 0;
                return {GetConstBuffer(instr.lea.rz.cb_index, instr.lea.rz.cb_offset),
                        GetOperandAbsNegInteger(GetRegister(instr.gpr8), false, neg, true),
                        Immediate(static_cast<u32>(instr.lea.rz.entry_a))};
            }
            case OpCode::Id::LEA_HI:
            default:
                UNIMPLEMENTED_MSG("Unhandled LEA subinstruction: {}", opcode->get().GetName());
                return {Immediate(static_cast<u32>(instr.lea.imm.entry_a)), GetRegister(instr.gpr8),
                        Immediate(static_cast<u32>(instr.lea.imm.entry_b))};
            }
        }();
        UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
                             "Unhandled LEA Predicate");
        Node value =
            Operation(OperationCode::ILogicalShiftLeft, std::move(op_a_), std::move(op_c_));
        value = Operation(OperationCode::IAdd, std::move(op_b_), std::move(value));
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled ArithmeticInteger instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 void ShaderIR::WriteLop3Instruction(NodeBlock& bb, Register dest, Node op_a, Node op_b, Node op_c,
                                    Node imm_lut, bool sets_cc) {
    const Node lop3_fast = [&](const Node na, const Node nb, const Node nc, const Node ttbl) {
        Node value = Immediate(0);
        const ImmediateNode imm = std::get<ImmediateNode>(*ttbl);
        if (imm.GetValue() & 0x01) {
            const Node a = Operation(OperationCode::IBitwiseNot, na);
            const Node b = Operation(OperationCode::IBitwiseNot, nb);
            const Node c = Operation(OperationCode::IBitwiseNot, nc);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x02) {
            const Node a = Operation(OperationCode::IBitwiseNot, na);
            const Node b = Operation(OperationCode::IBitwiseNot, nb);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, b);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x04) {
            const Node a = Operation(OperationCode::IBitwiseNot, na);
            const Node c = Operation(OperationCode::IBitwiseNot, nc);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x08) {
            const Node a = Operation(OperationCode::IBitwiseNot, na);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, a, nb);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x10) {
            const Node b = Operation(OperationCode::IBitwiseNot, nb);
            const Node c = Operation(OperationCode::IBitwiseNot, nc);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x20) {
            const Node b = Operation(OperationCode::IBitwiseNot, nb);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, b);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x40) {
            const Node c = Operation(OperationCode::IBitwiseNot, nc);
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, c);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        if (imm.GetValue() & 0x80) {
            Node r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, na, nb);
            r = Operation(OperationCode::IBitwiseAnd, NO_PRECISE, r, nc);
            value = Operation(OperationCode::IBitwiseOr, value, r);
        }
        return value;
    }(op_a, op_b, op_c, imm_lut);
    SetInternalFlagsFromInteger(bb, lop3_fast, sets_cc);
    SetRegister(bb, dest, lop3_fast);
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer_immediate.cpp
@ -1,99 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::LogicOperation;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::PredicateResultMode;
 using Tegra::Shader::Register;
 u32 ShaderIR::DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = Immediate(static_cast<s32>(instr.alu.imm20_32));
    switch (opcode->get().GetId()) {
    case OpCode::Id::IADD32I: {
        UNIMPLEMENTED_IF_MSG(instr.iadd32i.saturate, "IADD32I saturation is not implemented");
        op_a = GetOperandAbsNegInteger(std::move(op_a), false, instr.iadd32i.negate_a != 0, true);
        Node value = Operation(OperationCode::IAdd, PRECISE, std::move(op_a), std::move(op_b));
        SetInternalFlagsFromInteger(bb, value, instr.op_32.generates_cc != 0);
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::LOP32I: {
        if (instr.alu.lop32i.invert_a) {
            op_a = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_a));
        }
        if (instr.alu.lop32i.invert_b) {
            op_b = Operation(OperationCode::IBitwiseNot, NO_PRECISE, std::move(op_b));
        }
        WriteLogicOperation(bb, instr.gpr0, instr.alu.lop32i.operation, std::move(op_a),
                            std::move(op_b), PredicateResultMode::None, Pred::UnusedIndex,
                            instr.op_32.generates_cc != 0);
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled ArithmeticIntegerImmediate instruction: {}",
                          opcode->get().GetName());
    }
    return pc;
 }
 void ShaderIR::WriteLogicOperation(NodeBlock& bb, Register dest, LogicOperation logic_op, Node op_a,
                                   Node op_b, PredicateResultMode predicate_mode, Pred predicate,
                                   bool sets_cc) {
    Node result = [&] {
        switch (logic_op) {
        case LogicOperation::And:
            return Operation(OperationCode::IBitwiseAnd, PRECISE, std::move(op_a), std::move(op_b));
        case LogicOperation::Or:
            return Operation(OperationCode::IBitwiseOr, PRECISE, std::move(op_a), std::move(op_b));
        case LogicOperation::Xor:
            return Operation(OperationCode::IBitwiseXor, PRECISE, std::move(op_a), std::move(op_b));
        case LogicOperation::PassB:
            return op_b;
        default:
            UNIMPLEMENTED_MSG("Unimplemented logic operation={}", logic_op);
            return Immediate(0);
        }
    }();
    SetInternalFlagsFromInteger(bb, result, sets_cc);
    SetRegister(bb, dest, result);
    // Write the predicate value depending on the predicate mode.
    switch (predicate_mode) {
    case PredicateResultMode::None:
        // Do nothing.
        return;
    case PredicateResultMode::NotZero: {
        // Set the predicate to true if the result is not zero.
        Node compare = Operation(OperationCode::LogicalINotEqual, std::move(result), Immediate(0));
        SetPredicate(bb, static_cast<u64>(predicate), std::move(compare));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unimplemented predicate result mode: {}", predicate_mode);
    }
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfe.cpp
+++ b/src/video_core/shader/decode/bfe.cpp
@ -1,77 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeBfe(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = [&] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::BFE_R:
            return GetRegister(instr.gpr20);
        case OpCode::Id::BFE_C:
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        case OpCode::Id::BFE_IMM:
            return Immediate(instr.alu.GetSignedImm20_20());
        default:
            UNREACHABLE();
            return Immediate(0);
        }
    }();
    UNIMPLEMENTED_IF_MSG(instr.bfe.rd_cc, "Condition codes in BFE is not implemented");
    const bool is_signed = instr.bfe.is_signed;
    // using reverse parallel method in
    // https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
    // note for later if possible to implement faster method.
    if (instr.bfe.brev) {
        const auto swap = [&](u32 s, u32 mask) {
            Node v1 =
                SignedOperation(OperationCode::ILogicalShiftRight, is_signed, op_a, Immediate(s));
            if (mask != 0) {
                v1 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v1),
                                     Immediate(mask));
            }
            Node v2 = op_a;
            if (mask != 0) {
                v2 = SignedOperation(OperationCode::IBitwiseAnd, is_signed, std::move(v2),
                                     Immediate(mask));
            }
            v2 = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, std::move(v2),
                                 Immediate(s));
            return SignedOperation(OperationCode::IBitwiseOr, is_signed, std::move(v1),
                                   std::move(v2));
        };
        op_a = swap(1, 0x55555555U);
        op_a = swap(2, 0x33333333U);
        op_a = swap(4, 0x0F0F0F0FU);
        op_a = swap(8, 0x00FF00FFU);
        op_a = swap(16, 0);
    }
    const auto offset = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
                                        Immediate(0), Immediate(8));
    const auto bits = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_b,
                                      Immediate(8), Immediate(8));
    auto result = SignedOperation(OperationCode::IBitfieldExtract, is_signed, op_a, offset, bits);
    SetRegister(bb, instr.gpr0, std::move(result));
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/bfi.cpp
+++ b/src/video_core/shader/decode/bfi.cpp
@ -1,45 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeBfi(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    const auto [packed_shift, base] = [&]() -> std::pair<Node, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::BFI_RC:
            return {GetRegister(instr.gpr39),
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::BFI_IMM_R:
            return {Immediate(instr.alu.GetSignedImm20_20()), GetRegister(instr.gpr39)};
        default:
            UNREACHABLE();
            return {Immediate(0), Immediate(0)};
        }
    }();
    const Node insert = GetRegister(instr.gpr8);
    const Node offset = BitfieldExtract(packed_shift, 0, 8);
    const Node bits = BitfieldExtract(packed_shift, 8, 8);
    const Node value =
        Operation(OperationCode::UBitfieldInsert, PRECISE, base, insert, offset, bits);
    SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/conversion.cpp
+++ b/src/video_core/shader/decode/conversion.cpp
@ -1,321 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <limits>
 #include <optional>
 #include <utility>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 namespace {
 constexpr OperationCode GetFloatSelector(u64 selector) {
    return selector == 0 ? OperationCode::FCastHalf0 : OperationCode::FCastHalf1;
 }
 constexpr u32 SizeInBits(Register::Size size) {
    switch (size) {
    case Register::Size::Byte:
        return 8;
    case Register::Size::Short:
        return 16;
    case Register::Size::Word:
        return 32;
    case Register::Size::Long:
        return 64;
    }
    return 0;
 }
 constexpr std::optional<std::pair<s32, s32>> IntegerSaturateBounds(Register::Size src_size,
                                                                   Register::Size dst_size,
                                                                   bool src_signed,
                                                                   bool dst_signed) {
    const u32 dst_bits = SizeInBits(dst_size);
    if (src_size == Register::Size::Word && dst_size == Register::Size::Word) {
        if (src_signed == dst_signed) {
            return std::nullopt;
        }
        return std::make_pair(0, std::numeric_limits<s32>::max());
    }
    if (dst_signed) {
        // Signed destination, clamp to [-128, 127] for instance
        return std::make_pair(-(1 << (dst_bits - 1)), (1 << (dst_bits - 1)) - 1);
    } else {
        // Unsigned destination
        if (dst_bits == 32) {
            // Avoid shifting by 32, that is undefined behavior
            return std::make_pair(0, s32(std::numeric_limits<u32>::max()));
        }
        return std::make_pair(0, (1 << dst_bits) - 1);
    }
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    switch (opcode->get().GetId()) {
    case OpCode::Id::I2I_R:
    case OpCode::Id::I2I_C:
    case OpCode::Id::I2I_IMM: {
        const bool src_signed = instr.conversion.is_input_signed;
        const bool dst_signed = instr.conversion.is_output_signed;
        const Register::Size src_size = instr.conversion.src_size;
        const Register::Size dst_size = instr.conversion.dst_size;
        const u32 selector = static_cast<u32>(instr.conversion.int_src.selector);
        Node value = [this, instr, opcode] {
            switch (opcode->get().GetId()) {
            case OpCode::Id::I2I_R:
                return GetRegister(instr.gpr20);
            case OpCode::Id::I2I_C:
                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
            case OpCode::Id::I2I_IMM:
                return Immediate(instr.alu.GetSignedImm20_20());
            default:
                UNREACHABLE();
                return Immediate(0);
            }
        }();
        // Ensure the source selector is valid
        switch (instr.conversion.src_size) {
        case Register::Size::Byte:
            break;
        case Register::Size::Short:
            ASSERT(selector == 0 || selector == 2);
            break;
        default:
            ASSERT(selector == 0);
            break;
        }
        if (src_size != Register::Size::Word || selector != 0) {
            value = SignedOperation(OperationCode::IBitfieldExtract, src_signed, std::move(value),
                                    Immediate(selector * 8), Immediate(SizeInBits(src_size)));
        }
        value = GetOperandAbsNegInteger(std::move(value), instr.conversion.abs_a,
                                        instr.conversion.negate_a, src_signed);
        if (instr.alu.saturate_d) {
            if (src_signed && !dst_signed) {
                Node is_negative = Operation(OperationCode::LogicalUGreaterEqual, value,
                                             Immediate(1 << (SizeInBits(src_size) - 1)));
                value = Operation(OperationCode::Select, std::move(is_negative), Immediate(0),
                                  std::move(value));
                // Simplify generated expressions, this can be removed without semantic impact
                SetTemporary(bb, 0, std::move(value));
                value = GetTemporary(0);
                if (dst_size != Register::Size::Word) {
                    const Node limit = Immediate((1 << SizeInBits(dst_size)) - 1);
                    Node is_large =
                        Operation(OperationCode::LogicalUGreaterThan, std::move(value), limit);
                    value = Operation(OperationCode::Select, std::move(is_large), limit,
                                      std::move(value));
                }
            } else if (const std::optional bounds =
                           IntegerSaturateBounds(src_size, dst_size, src_signed, dst_signed)) {
                value = SignedOperation(OperationCode::IMax, src_signed, std::move(value),
                                        Immediate(bounds->first));
                value = SignedOperation(OperationCode::IMin, src_signed, std::move(value),
                                        Immediate(bounds->second));
            }
        } else if (dst_size != Register::Size::Word) {
            // No saturation, we only have to mask the result
            Node mask = Immediate((1 << SizeInBits(dst_size)) - 1);
            value = Operation(OperationCode::UBitwiseAnd, std::move(value), std::move(mask));
        }
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, std::move(value));
        break;
    }
    case OpCode::Id::I2F_R:
    case OpCode::Id::I2F_C:
    case OpCode::Id::I2F_IMM: {
        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in I2F is not implemented");
        Node value = [&] {
            switch (opcode->get().GetId()) {
            case OpCode::Id::I2F_R:
                return GetRegister(instr.gpr20);
            case OpCode::Id::I2F_C:
                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
            case OpCode::Id::I2F_IMM:
                return Immediate(instr.alu.GetSignedImm20_20());
            default:
                UNREACHABLE();
                return Immediate(0);
            }
        }();
        const bool input_signed = instr.conversion.is_input_signed;
        if (const u32 offset = static_cast<u32>(instr.conversion.int_src.selector); offset > 0) {
            ASSERT(instr.conversion.src_size == Register::Size::Byte ||
                   instr.conversion.src_size == Register::Size::Short);
            if (instr.conversion.src_size == Register::Size::Short) {
                ASSERT(offset == 0 || offset == 2);
            }
            value = SignedOperation(OperationCode::ILogicalShiftRight, input_signed,
                                    std::move(value), Immediate(offset * 8));
        }
        value = ConvertIntegerSize(value, instr.conversion.src_size, input_signed);
        value = GetOperandAbsNegInteger(value, instr.conversion.abs_a, false, input_signed);
        value = SignedOperation(OperationCode::FCastInteger, input_signed, PRECISE, value);
        value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
        if (instr.conversion.dst_size == Register::Size::Short) {
            value = Operation(OperationCode::HCastFloat, PRECISE, value);
        }
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::F2F_R:
    case OpCode::Id::F2F_C:
    case OpCode::Id::F2F_IMM: {
        UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long);
        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in F2F is not implemented");
        Node value = [&]() {
            switch (opcode->get().GetId()) {
            case OpCode::Id::F2F_R:
                return GetRegister(instr.gpr20);
            case OpCode::Id::F2F_C:
                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
            case OpCode::Id::F2F_IMM:
                return GetImmediate19(instr);
            default:
                UNREACHABLE();
                return Immediate(0);
            }
        }();
        if (instr.conversion.src_size == Register::Size::Short) {
            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
                              std::move(value));
        } else {
            ASSERT(instr.conversion.float_src.selector == 0);
        }
        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
        value = [&] {
            if (instr.conversion.src_size != instr.conversion.dst_size) {
                // Rounding operations only matter when the source and destination conversion size
                // is the same.
                return value;
            }
            switch (instr.conversion.f2f.GetRoundingMode()) {
            case Tegra::Shader::F2fRoundingOp::None:
                return value;
            case Tegra::Shader::F2fRoundingOp::Round:
                return Operation(OperationCode::FRoundEven, value);
            case Tegra::Shader::F2fRoundingOp::Floor:
                return Operation(OperationCode::FFloor, value);
            case Tegra::Shader::F2fRoundingOp::Ceil:
                return Operation(OperationCode::FCeil, value);
            case Tegra::Shader::F2fRoundingOp::Trunc:
                return Operation(OperationCode::FTrunc, value);
            default:
                UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}",
                                  instr.conversion.f2f.rounding.Value());
                return value;
            }
        }();
        value = GetSaturatedFloat(value, instr.alu.saturate_d);
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
        if (instr.conversion.dst_size == Register::Size::Short) {
            value = Operation(OperationCode::HCastFloat, PRECISE, value);
        }
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::F2I_R:
    case OpCode::Id::F2I_C:
    case OpCode::Id::F2I_IMM: {
        UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long);
        UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                             "Condition codes generation in F2I is not implemented");
        Node value = [&]() {
            switch (opcode->get().GetId()) {
            case OpCode::Id::F2I_R:
                return GetRegister(instr.gpr20);
            case OpCode::Id::F2I_C:
                return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
            case OpCode::Id::F2I_IMM:
                return GetImmediate19(instr);
            default:
                UNREACHABLE();
                return Immediate(0);
            }
        }();
        if (instr.conversion.src_size == Register::Size::Short) {
            value = Operation(GetFloatSelector(instr.conversion.float_src.selector), NO_PRECISE,
                              std::move(value));
        } else {
            ASSERT(instr.conversion.float_src.selector == 0);
        }
        value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a);
        value = [&]() {
            switch (instr.conversion.f2i.rounding) {
            case Tegra::Shader::F2iRoundingOp::RoundEven:
                return Operation(OperationCode::FRoundEven, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Floor:
                return Operation(OperationCode::FFloor, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Ceil:
                return Operation(OperationCode::FCeil, PRECISE, value);
            case Tegra::Shader::F2iRoundingOp::Trunc:
                return Operation(OperationCode::FTrunc, PRECISE, value);
            default:
                UNIMPLEMENTED_MSG("Unimplemented F2I rounding mode {}",
                                  instr.conversion.f2i.rounding.Value());
                return Immediate(0);
            }
        }();
        const bool is_signed = instr.conversion.is_output_signed;
        value = SignedOperation(OperationCode::ICastFloat, is_signed, PRECISE, value);
        value = ConvertIntegerSize(value, instr.conversion.dst_size, is_signed);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@ -1,62 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
    if (instr.ffma.tab5980_0 != 1) {
        LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
    }
    if (instr.ffma.tab5980_1 != 0) {
        LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
    }
    const Node op_a = GetRegister(instr.gpr8);
    auto [op_b, op_c] = [&]() -> std::tuple<Node, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::FFMA_CR: {
            return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    GetRegister(instr.gpr39)};
        }
        case OpCode::Id::FFMA_RR:
            return {GetRegister(instr.gpr20), GetRegister(instr.gpr39)};
        case OpCode::Id::FFMA_RC: {
            return {GetRegister(instr.gpr39),
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        }
        case OpCode::Id::FFMA_IMM:
            return {GetImmediate19(instr), GetRegister(instr.gpr39)};
        default:
            UNIMPLEMENTED_MSG("Unhandled FFMA instruction: {}", opcode->get().GetName());
            return {Immediate(0), Immediate(0)};
        }
    }();
    op_b = GetOperandAbsNegFloat(op_b, false, instr.ffma.negate_b);
    op_c = GetOperandAbsNegFloat(op_c, false, instr.ffma.negate_c);
    Node value = Operation(OperationCode::FFma, PRECISE, op_a, op_b, op_c);
    value = GetSaturatedFloat(value, instr.alu.saturate_d);
    SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set.cpp
+++ b/src/video_core/shader/decode/float_set.cpp
@ -1,58 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeFloatSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fset.abs_a != 0,
                                            instr.fset.neg_a != 0);
    Node op_b = [&]() {
        if (instr.is_b_imm) {
            return GetImmediate19(instr);
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    op_b = GetOperandAbsNegFloat(op_b, instr.fset.abs_b != 0, instr.fset.neg_b != 0);
    // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
    // condition is true, and to 0 otherwise.
    const Node second_pred = GetPredicate(instr.fset.pred39, instr.fset.neg_pred != 0);
    const OperationCode combiner = GetPredicateCombiner(instr.fset.op);
    const Node first_pred = GetPredicateComparisonFloat(instr.fset.cond, op_a, op_b);
    const Node predicate = Operation(combiner, first_pred, second_pred);
    const Node true_value = instr.fset.bf ? Immediate(1.0f) : Immediate(-1);
    const Node false_value = instr.fset.bf ? Immediate(0.0f) : Immediate(0);
    const Node value =
        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
    if (instr.fset.bf) {
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
    } else {
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
    }
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/float_set_predicate.cpp
+++ b/src/video_core/shader/decode/float_set_predicate.cpp
@ -1,57 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 u32 ShaderIR::DecodeFloatSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    Node op_a = GetOperandAbsNegFloat(GetRegister(instr.gpr8), instr.fsetp.abs_a != 0,
                                      instr.fsetp.neg_a != 0);
    Node op_b = [&]() {
        if (instr.is_b_imm) {
            return GetImmediate19(instr);
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    op_b = GetOperandAbsNegFloat(std::move(op_b), instr.fsetp.abs_b, instr.fsetp.neg_b);
    // We can't use the constant predicate as destination.
    ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
    const Node predicate =
        GetPredicateComparisonFloat(instr.fsetp.cond, std::move(op_a), std::move(op_b));
    const Node second_pred = GetPredicate(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
    const OperationCode combiner = GetPredicateCombiner(instr.fsetp.op);
    const Node value = Operation(combiner, predicate, second_pred);
    // Set the primary predicate to the result of Predicate OP SecondPredicate
    SetPredicate(bb, instr.fsetp.pred3, value);
    if (instr.fsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
        // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
        // if enabled
        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
        const Node second_value = Operation(combiner, negated_pred, second_pred);
        SetPredicate(bb, instr.fsetp.pred0, second_value);
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@ -1,115 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <array>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::PredCondition;
 u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    PredCondition cond{};
    bool bf = false;
    bool ftz = false;
    bool neg_a = false;
    bool abs_a = false;
    bool neg_b = false;
    bool abs_b = false;
    switch (opcode->get().GetId()) {
    case OpCode::Id::HSET2_C:
    case OpCode::Id::HSET2_IMM:
        cond = instr.hsetp2.cbuf_and_imm.cond;
        bf = instr.Bit(53);
        ftz = instr.Bit(54);
        neg_a = instr.Bit(43);
        abs_a = instr.Bit(44);
        neg_b = instr.Bit(56);
        abs_b = instr.Bit(54);
        break;
    case OpCode::Id::HSET2_R:
        cond = instr.hsetp2.reg.cond;
        bf = instr.Bit(49);
        ftz = instr.Bit(50);
        neg_a = instr.Bit(43);
        abs_a = instr.Bit(44);
        neg_b = instr.Bit(31);
        abs_b = instr.Bit(30);
        break;
    default:
        UNREACHABLE();
    }
    Node op_b = [this, instr, opcode] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HSET2_C:
            // Inform as unimplemented as this is not tested.
            UNIMPLEMENTED_MSG("HSET2_C is not implemented");
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        case OpCode::Id::HSET2_R:
            return GetRegister(instr.gpr20);
        case OpCode::Id::HSET2_IMM:
            return UnpackHalfImmediate(instr, true);
        default:
            UNREACHABLE();
            return Node{};
        }
    }();
    if (!ftz) {
        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
    }
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
    op_a = GetOperandAbsNegHalf(op_a, abs_a, neg_a);
    switch (opcode->get().GetId()) {
    case OpCode::Id::HSET2_R:
        op_b = GetOperandAbsNegHalf(move(op_b), abs_b, neg_b);
        [[fallthrough]];
    case OpCode::Id::HSET2_C:
        op_b = UnpackHalfFloat(move(op_b), instr.hset2.type_b);
        break;
    default:
        break;
    }
    Node second_pred = GetPredicate(instr.hset2.pred39, instr.hset2.neg_pred);
    Node comparison_pair = GetPredicateComparisonHalf(cond, op_a, op_b);
    const OperationCode combiner = GetPredicateCombiner(instr.hset2.op);
    // HSET2 operates on each half float in the pack.
    std::array<Node, 2> values;
    for (u32 i = 0; i < 2; ++i) {
        const u32 raw_value = bf ? 0x3c00 : 0xffff;
        Node true_value = Immediate(raw_value << (i * 16));
        Node false_value = Immediate(0);
        Node comparison = Operation(OperationCode::LogicalPick2, comparison_pair, Immediate(i));
        Node predicate = Operation(combiner, comparison, second_pred);
        values[i] =
            Operation(OperationCode::Select, predicate, move(true_value), move(false_value));
    }
    Node value = Operation(OperationCode::UBitwiseOr, values[0], values[1]);
    SetRegister(bb, instr.gpr0, move(value));
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@ -1,80 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    if (instr.hsetp2.ftz != 0) {
        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
    }
    Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
    op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
    Tegra::Shader::PredCondition cond{};
    bool h_and{};
    Node op_b{};
    switch (opcode->get().GetId()) {
    case OpCode::Id::HSETP2_C:
        cond = instr.hsetp2.cbuf_and_imm.cond;
        h_and = instr.hsetp2.cbuf_and_imm.h_and;
        op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                                    instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
        // F32 is hardcoded in hardware
        op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
        break;
    case OpCode::Id::HSETP2_IMM:
        cond = instr.hsetp2.cbuf_and_imm.cond;
        h_and = instr.hsetp2.cbuf_and_imm.h_and;
        op_b = UnpackHalfImmediate(instr, true);
        break;
    case OpCode::Id::HSETP2_R:
        cond = instr.hsetp2.reg.cond;
        h_and = instr.hsetp2.reg.h_and;
        op_b =
            GetOperandAbsNegHalf(UnpackHalfFloat(GetRegister(instr.gpr20), instr.hsetp2.reg.type_b),
                                 instr.hsetp2.reg.abs_b, instr.hsetp2.reg.negate_b);
        break;
    default:
        UNREACHABLE();
        op_b = Immediate(0);
    }
    const OperationCode combiner = GetPredicateCombiner(instr.hsetp2.op);
    const Node combined_pred = GetPredicate(instr.hsetp2.pred39, instr.hsetp2.neg_pred);
    const auto Write = [&](u64 dest, Node src) {
        SetPredicate(bb, dest, Operation(combiner, std::move(src), combined_pred));
    };
    const Node comparison = GetPredicateComparisonHalf(cond, op_a, op_b);
    const u64 first = instr.hsetp2.pred3;
    const u64 second = instr.hsetp2.pred0;
    if (h_and) {
        Node joined = Operation(OperationCode::LogicalAnd2, comparison);
        Write(first, joined);
        Write(second, Operation(OperationCode::LogicalNegate, std::move(joined)));
    } else {
        Write(first, Operation(OperationCode::LogicalPick2, comparison, Immediate(0U)));
        Write(second, Operation(OperationCode::LogicalPick2, comparison, Immediate(1U)));
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/hfma2.cpp
+++ b/src/video_core/shader/decode/hfma2.cpp
@ -1,73 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <tuple>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::HalfPrecision;
 using Tegra::Shader::HalfType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeHfma2(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    if (opcode->get().GetId() == OpCode::Id::HFMA2_RR) {
        DEBUG_ASSERT(instr.hfma2.rr.precision == HalfPrecision::None);
    } else {
        DEBUG_ASSERT(instr.hfma2.precision == HalfPrecision::None);
    }
    constexpr auto identity = HalfType::H0_H1;
    bool neg_b{}, neg_c{};
    auto [saturate, type_b, op_b, type_c,
          op_c] = [&]() -> std::tuple<bool, HalfType, Node, HalfType, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::HFMA2_CR:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, HalfType::F32,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_RC:
            neg_b = instr.hfma2.negate_b;
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, instr.hfma2.type_reg39, GetRegister(instr.gpr39),
                    HalfType::F32, GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::HFMA2_RR:
            neg_b = instr.hfma2.rr.negate_b;
            neg_c = instr.hfma2.rr.negate_c;
            return {instr.hfma2.rr.saturate, instr.hfma2.type_b, GetRegister(instr.gpr20),
                    instr.hfma2.rr.type_c, GetRegister(instr.gpr39)};
        case OpCode::Id::HFMA2_IMM_R:
            neg_c = instr.hfma2.negate_c;
            return {instr.hfma2.saturate, identity, UnpackHalfImmediate(instr, true),
                    instr.hfma2.type_reg39, GetRegister(instr.gpr39)};
        default:
            return {false, identity, Immediate(0), identity, Immediate(0)};
        }
    }();
    const Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hfma2.type_a);
    op_b = GetOperandAbsNegHalf(UnpackHalfFloat(op_b, type_b), false, neg_b);
    op_c = GetOperandAbsNegHalf(UnpackHalfFloat(op_c, type_c), false, neg_c);
    Node value = Operation(OperationCode::HFma, PRECISE, op_a, op_b, op_c);
    value = GetSaturatedHalfFloat(value, saturate);
    value = HalfMerge(GetRegister(instr.gpr0), value, instr.hfma2.merge);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@ -1,536 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <vector>
 #include <fmt/format.h>
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/textures/texture.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::PredCondition;
 using Tegra::Shader::StoreType;
 using Tegra::Texture::ComponentType;
 using Tegra::Texture::TextureFormat;
 using Tegra::Texture::TICEntry;
 namespace {
 ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
                               std::size_t component) {
    const TextureFormat format{descriptor.format};
    switch (format) {
    case TextureFormat::R16G16B16A16:
    case TextureFormat::R32G32B32A32:
    case TextureFormat::R32G32B32:
    case TextureFormat::R32G32:
    case TextureFormat::R16G16:
    case TextureFormat::R32:
    case TextureFormat::R16:
    case TextureFormat::R8:
    case TextureFormat::R1:
        if (component == 0) {
            return descriptor.r_type;
        }
        if (component == 1) {
            return descriptor.g_type;
        }
        if (component == 2) {
            return descriptor.b_type;
        }
        if (component == 3) {
            return descriptor.a_type;
        }
        break;
    case TextureFormat::A8R8G8B8:
        if (component == 0) {
            return descriptor.a_type;
        }
        if (component == 1) {
            return descriptor.r_type;
        }
        if (component == 2) {
            return descriptor.g_type;
        }
        if (component == 3) {
            return descriptor.b_type;
        }
        break;
    case TextureFormat::A2B10G10R10:
    case TextureFormat::A4B4G4R4:
    case TextureFormat::A5B5G5R1:
    case TextureFormat::A1B5G5R5:
        if (component == 0) {
            return descriptor.a_type;
        }
        if (component == 1) {
            return descriptor.b_type;
        }
        if (component == 2) {
            return descriptor.g_type;
        }
        if (component == 3) {
            return descriptor.r_type;
        }
        break;
    case TextureFormat::R32_B24G8:
        if (component == 0) {
            return descriptor.r_type;
        }
        if (component == 1) {
            return descriptor.b_type;
        }
        if (component == 2) {
            return descriptor.g_type;
        }
        break;
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
    case TextureFormat::B10G11R11:
        if (component == 0) {
            return descriptor.b_type;
        }
        if (component == 1) {
            return descriptor.g_type;
        }
        if (component == 2) {
            return descriptor.r_type;
        }
        break;
    case TextureFormat::R24G8:
    case TextureFormat::R8G24:
    case TextureFormat::R8G8:
    case TextureFormat::G4R4:
        if (component == 0) {
            return descriptor.g_type;
        }
        if (component == 1) {
            return descriptor.r_type;
        }
        break;
    default:
        break;
    }
    UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
    return ComponentType::FLOAT;
 }
 bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
    constexpr u8 R = 0b0001;
    constexpr u8 G = 0b0010;
    constexpr u8 B = 0b0100;
    constexpr u8 A = 0b1000;
    constexpr std::array<u8, 16> mask = {
        0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B),
        (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)};
    return std::bitset<4>{mask.at(component_mask)}.test(component);
 }
 u32 GetComponentSize(TextureFormat format, std::size_t component) {
    switch (format) {
    case TextureFormat::R32G32B32A32:
        return 32;
    case TextureFormat::R16G16B16A16:
        return 16;
    case TextureFormat::R32G32B32:
        return component <= 2 ? 32 : 0;
    case TextureFormat::R32G32:
        return component <= 1 ? 32 : 0;
    case TextureFormat::R16G16:
        return component <= 1 ? 16 : 0;
    case TextureFormat::R32:
        return component == 0 ? 32 : 0;
    case TextureFormat::R16:
        return component == 0 ? 16 : 0;
    case TextureFormat::R8:
        return component == 0 ? 8 : 0;
    case TextureFormat::R1:
        return component == 0 ? 1 : 0;
    case TextureFormat::A8R8G8B8:
        return 8;
    case TextureFormat::A2B10G10R10:
        return (component == 3 || component == 2 || component == 1) ? 10 : 2;
    case TextureFormat::A4B4G4R4:
        return 4;
    case TextureFormat::A5B5G5R1:
        return (component == 0 || component == 1 || component == 2) ? 5 : 1;
    case TextureFormat::A1B5G5R5:
        return (component == 1 || component == 2 || component == 3) ? 5 : 1;
    case TextureFormat::R32_B24G8:
        if (component == 0) {
            return 32;
        }
        if (component == 1) {
            return 24;
        }
        if (component == 2) {
            return 8;
        }
        return 0;
    case TextureFormat::B5G6R5:
        if (component == 0 || component == 2) {
            return 5;
        }
        if (component == 1) {
            return 6;
        }
        return 0;
    case TextureFormat::B6G5R5:
        if (component == 1 || component == 2) {
            return 5;
        }
        if (component == 0) {
            return 6;
        }
        return 0;
    case TextureFormat::B10G11R11:
        if (component == 1 || component == 2) {
            return 11;
        }
        if (component == 0) {
            return 10;
        }
        return 0;
    case TextureFormat::R24G8:
        if (component == 0) {
            return 8;
        }
        if (component == 1) {
            return 24;
        }
        return 0;
    case TextureFormat::R8G24:
        if (component == 0) {
            return 24;
        }
        if (component == 1) {
            return 8;
        }
        return 0;
    case TextureFormat::R8G8:
        return (component == 0 || component == 1) ? 8 : 0;
    case TextureFormat::G4R4:
        return (component == 0 || component == 1) ? 4 : 0;
    default:
        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
        return 0;
    }
 }
 std::size_t GetImageComponentMask(TextureFormat format) {
    constexpr u8 R = 0b0001;
    constexpr u8 G = 0b0010;
    constexpr u8 B = 0b0100;
    constexpr u8 A = 0b1000;
    switch (format) {
    case TextureFormat::R32G32B32A32:
    case TextureFormat::R16G16B16A16:
    case TextureFormat::A8R8G8B8:
    case TextureFormat::A2B10G10R10:
    case TextureFormat::A4B4G4R4:
    case TextureFormat::A5B5G5R1:
    case TextureFormat::A1B5G5R5:
        return std::size_t{R | G | B | A};
    case TextureFormat::R32G32B32:
    case TextureFormat::R32_B24G8:
    case TextureFormat::B5G6R5:
    case TextureFormat::B6G5R5:
    case TextureFormat::B10G11R11:
        return std::size_t{R | G | B};
    case TextureFormat::R32G32:
    case TextureFormat::R16G16:
    case TextureFormat::R24G8:
    case TextureFormat::R8G24:
    case TextureFormat::R8G8:
    case TextureFormat::G4R4:
        return std::size_t{R | G};
    case TextureFormat::R32:
    case TextureFormat::R16:
    case TextureFormat::R8:
    case TextureFormat::R1:
        return std::size_t{R};
    default:
        UNIMPLEMENTED_MSG("Texture format not implemented={}", format);
        return std::size_t{R | G | B | A};
    }
 }
 std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) {
    switch (image_type) {
    case Tegra::Shader::ImageType::Texture1D:
    case Tegra::Shader::ImageType::TextureBuffer:
        return 1;
    case Tegra::Shader::ImageType::Texture1DArray:
    case Tegra::Shader::ImageType::Texture2D:
        return 2;
    case Tegra::Shader::ImageType::Texture2DArray:
    case Tegra::Shader::ImageType::Texture3D:
        return 3;
    }
    UNREACHABLE();
    return 1;
 }
 } // Anonymous namespace
 std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size,
                                                  Node original_value) {
    switch (component_type) {
    case ComponentType::SNORM: {
        // range [-1.0, 1.0]
        auto cnv_value = Operation(OperationCode::FMul, original_value,
                                   Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f));
        cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value));
        return {BitfieldExtract(std::move(cnv_value), 0, component_size), true};
    }
    case ComponentType::SINT:
    case ComponentType::UNORM: {
        bool is_signed = component_type == ComponentType::SINT;
        // range [0.0, 1.0]
        auto cnv_value = Operation(OperationCode::FMul, original_value,
                                   Immediate(static_cast<float>(1 << component_size) - 1.f));
        return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)),
                is_signed};
    }
    case ComponentType::UINT: // range [0, (1 << component_size) - 1]
        return {std::move(original_value), false};
    case ComponentType::FLOAT:
        if (component_size == 16) {
            return {Operation(OperationCode::HCastFloat, original_value), true};
        } else {
            return {std::move(original_value), true};
        }
    default:
        UNIMPLEMENTED_MSG("Unimplemented component type={}", component_type);
        return {std::move(original_value), true};
    }
 }
 u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) {
        std::vector<Node> coords;
        const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)};
        coords.reserve(num_coords);
        for (std::size_t i = 0; i < num_coords; ++i) {
            coords.push_back(GetRegister(instr.gpr8.Value() + i));
        }
        return coords;
    };
    switch (opcode->get().GetId()) {
    case OpCode::Id::SULD: {
        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                         Tegra::Shader::OutOfBoundsStore::Ignore);
        const auto type{instr.suldst.image_type};
        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
                                              : GetBindlessImage(instr.gpr39, type)};
        image.MarkRead();
        if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) {
            u32 indexer = 0;
            for (u32 element = 0; element < 4; ++element) {
                if (!instr.suldst.IsComponentEnabled(element)) {
                    continue;
                }
                MetaImage meta{image, {}, element};
                Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type));
                SetTemporary(bb, indexer++, std::move(value));
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
        } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) {
            UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 &&
                             instr.suldst.GetStoreDataLayout() != StoreType::Bits64);
            auto descriptor = [this, instr] {
                std::optional<Tegra::Engines::SamplerDescriptor> sampler_descriptor;
                if (instr.suldst.is_immediate) {
                    sampler_descriptor =
                        registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value()));
                } else {
                    const Node image_register = GetRegister(instr.gpr39);
                    const auto result = TrackCbuf(image_register, global_code,
                                                  static_cast<s64>(global_code.size()));
                    const auto buffer = std::get<1>(result);
                    const auto offset = std::get<2>(result);
                    sampler_descriptor = registry.ObtainBindlessSampler(buffer, offset);
                }
                if (!sampler_descriptor) {
                    UNREACHABLE_MSG("Failed to obtain image descriptor");
                }
                return *sampler_descriptor;
            }();
            const auto comp_mask = GetImageComponentMask(descriptor.format);
            switch (instr.suldst.GetStoreDataLayout()) {
            case StoreType::Bits32:
            case StoreType::Bits64: {
                u32 indexer = 0;
                u32 shifted_counter = 0;
                Node value = Immediate(0);
                for (u32 element = 0; element < 4; ++element) {
                    if (!IsComponentEnabled(comp_mask, element)) {
                        continue;
                    }
                    const auto component_type = GetComponentType(descriptor, element);
                    const auto component_size = GetComponentSize(descriptor.format, element);
                    MetaImage meta{image, {}, element};
                    auto [converted_value, is_signed] = GetComponentValue(
                        component_type, component_size,
                        Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)));
                    // shift element to correct position
                    const auto shifted = shifted_counter;
                    if (shifted > 0) {
                        converted_value =
                            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed,
                                            std::move(converted_value), Immediate(shifted));
                    }
                    shifted_counter += component_size;
                    // add value into result
                    value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value));
                    // if we shifted enough for 1 byte -> we save it into temp
                    if (shifted_counter >= 32) {
                        SetTemporary(bb, indexer++, std::move(value));
                        // reset counter and value to prepare pack next byte
                        value = Immediate(0);
                        shifted_counter = 0;
                    }
                }
                for (u32 i = 0; i < indexer; ++i) {
                    SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
                }
                break;
            }
            default:
                UNREACHABLE();
                break;
            }
        }
        break;
    }
    case OpCode::Id::SUST: {
        UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P);
        UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store !=
                         Tegra::Shader::OutOfBoundsStore::Ignore);
        UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA
        std::vector<Node> values;
        constexpr std::size_t hardcoded_size{4};
        for (std::size_t i = 0; i < hardcoded_size; ++i) {
            values.push_back(GetRegister(instr.gpr0.Value() + i));
        }
        const auto type{instr.suldst.image_type};
        auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type)
                                              : GetBindlessImage(instr.gpr39, type)};
        image.MarkWrite();
        MetaImage meta{image, std::move(values)};
        bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type)));
        break;
    }
    case OpCode::Id::SUATOM: {
        UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0);
        const OperationCode operation_code = [instr] {
            switch (instr.suatom_d.operation_type) {
            case Tegra::Shader::ImageAtomicOperationType::S32:
            case Tegra::Shader::ImageAtomicOperationType::U32:
                switch (instr.suatom_d.operation) {
                case Tegra::Shader::ImageAtomicOperation::Add:
                    return OperationCode::AtomicImageAdd;
                case Tegra::Shader::ImageAtomicOperation::And:
                    return OperationCode::AtomicImageAnd;
                case Tegra::Shader::ImageAtomicOperation::Or:
                    return OperationCode::AtomicImageOr;
                case Tegra::Shader::ImageAtomicOperation::Xor:
                    return OperationCode::AtomicImageXor;
                case Tegra::Shader::ImageAtomicOperation::Exch:
                    return OperationCode::AtomicImageExchange;
                default:
                    break;
                }
                break;
            default:
                break;
            }
            UNIMPLEMENTED_MSG("Unimplemented operation={}, type={}",
                              static_cast<u64>(instr.suatom_d.operation.Value()),
                              static_cast<u64>(instr.suatom_d.operation_type.Value()));
            return OperationCode::AtomicImageAdd;
        }();
        Node value = GetRegister(instr.gpr0);
        const auto type = instr.suatom_d.image_type;
        auto& image = GetImage(instr.image, type);
        image.MarkAtomic();
        MetaImage meta{image, {std::move(value)}};
        SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type)));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled image instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 ImageEntry& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
    const auto offset = static_cast<u32>(image.index.Value());
    const auto it =
        std::find_if(std::begin(used_images), std::end(used_images),
                     [offset](const ImageEntry& entry) { return entry.offset == offset; });
    if (it != std::end(used_images)) {
        ASSERT(!it->is_bindless && it->type == type);
        return *it;
    }
    const auto next_index = static_cast<u32>(used_images.size());
    return used_images.emplace_back(next_index, offset, type);
 }
 ImageEntry& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
    const Node image_register = GetRegister(reg);
    const auto result =
        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
    const auto buffer = std::get<1>(result);
    const auto offset = std::get<2>(result);
    const auto it = std::find_if(std::begin(used_images), std::end(used_images),
                                 [buffer, offset](const ImageEntry& entry) {
                                     return entry.buffer == buffer && entry.offset == offset;
                                 });
    if (it != std::end(used_images)) {
        ASSERT(it->is_bindless && it->type == type);
        return *it;
    }
    const auto next_index = static_cast<u32>(used_images.size());
    return used_images.emplace_back(next_index, offset, buffer, type);
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set.cpp
+++ b/src/video_core/shader/decode/integer_set.cpp
@ -1,49 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodeIntegerSet(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const Node op_a = GetRegister(instr.gpr8);
    const Node op_b = [&]() {
        if (instr.is_b_imm) {
            return Immediate(instr.alu.GetSignedImm20_20());
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the condition
    // is true, and to 0 otherwise.
    const Node second_pred = GetPredicate(instr.iset.pred39, instr.iset.neg_pred != 0);
    const Node first_pred =
        GetPredicateComparisonInteger(instr.iset.cond, instr.iset.is_signed, op_a, op_b);
    const OperationCode combiner = GetPredicateCombiner(instr.iset.op);
    const Node predicate = Operation(combiner, first_pred, second_pred);
    const Node true_value = instr.iset.bf ? Immediate(1.0f) : Immediate(-1);
    const Node false_value = instr.iset.bf ? Immediate(0.0f) : Immediate(0);
    const Node value =
        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/integer_set_predicate.cpp
+++ b/src/video_core/shader/decode/integer_set_predicate.cpp
@ -1,53 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 u32 ShaderIR::DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const Node op_a = GetRegister(instr.gpr8);
    const Node op_b = [&]() {
        if (instr.is_b_imm) {
            return Immediate(instr.alu.GetSignedImm20_20());
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    // We can't use the constant predicate as destination.
    ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
    const Node second_pred = GetPredicate(instr.isetp.pred39, instr.isetp.neg_pred != 0);
    const Node predicate =
        GetPredicateComparisonInteger(instr.isetp.cond, instr.isetp.is_signed, op_a, op_b);
    // Set the primary predicate to the result of Predicate OP SecondPredicate
    const OperationCode combiner = GetPredicateCombiner(instr.isetp.op);
    const Node value = Operation(combiner, predicate, second_pred);
    SetPredicate(bb, instr.isetp.pred3, value);
    if (instr.isetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
        // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if enabled
        const Node negated_pred = Operation(OperationCode::LogicalNegate, predicate);
        SetPredicate(bb, instr.isetp.pred0, Operation(combiner, negated_pred, second_pred));
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@ -1,493 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <utility>
 #include <vector>
 #include <fmt/format.h>
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::AtomicOp;
 using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
 using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 using Tegra::Shader::StoreType;
 namespace {
 OperationCode GetAtomOperation(AtomicOp op) {
    switch (op) {
    case AtomicOp::Add:
        return OperationCode::AtomicIAdd;
    case AtomicOp::Min:
        return OperationCode::AtomicIMin;
    case AtomicOp::Max:
        return OperationCode::AtomicIMax;
    case AtomicOp::And:
        return OperationCode::AtomicIAnd;
    case AtomicOp::Or:
        return OperationCode::AtomicIOr;
    case AtomicOp::Xor:
        return OperationCode::AtomicIXor;
    case AtomicOp::Exch:
        return OperationCode::AtomicIExchange;
    default:
        UNIMPLEMENTED_MSG("op={}", op);
        return OperationCode::AtomicIAdd;
    }
 }
 bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
 }
 u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::UnsignedByte:
        return 0b11;
    case Tegra::Shader::UniformType::UnsignedShort:
        return 0b10;
    default:
        UNREACHABLE();
        return 0;
    }
 }
 u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::UnsignedByte:
        return 8;
    case Tegra::Shader::UniformType::UnsignedShort:
        return 16;
    case Tegra::Shader::UniformType::Single:
        return 32;
    case Tegra::Shader::UniformType::Double:
        return 64;
    case Tegra::Shader::UniformType::Quad:
    case Tegra::Shader::UniformType::UnsignedQuad:
        return 128;
    default:
        UNIMPLEMENTED_MSG("Unimplemented size={}!", uniform_type);
        return 32;
    }
 }
 Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
    return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
 }
 Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
    Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
    offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
    return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
                     Immediate(size));
 }
 Node Sign16Extend(Node value) {
    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
    Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
    return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    switch (opcode->get().GetId()) {
    case OpCode::Id::LD_A: {
        // Note: Shouldn't this be interp mode flat? As in no interpolation made.
        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                             "Indirect attribute loads are not supported");
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");
        UNIMPLEMENTED_IF_MSG(instr.attribute.fmt20.IsPhysical() &&
                                 instr.attribute.fmt20.size != Tegra::Shader::AttributeSize::Word,
                             "Non-32 bits PHYS reads are not implemented");
        const Node buffer{GetRegister(instr.gpr39)};
        u64 next_element = instr.attribute.fmt20.element;
        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
        const auto LoadNextElement = [&](u32 reg_offset) {
            const Node attribute{instr.attribute.fmt20.IsPhysical()
                                     ? GetPhysicalInputAttribute(instr.gpr8, buffer)
                                     : GetInputAttribute(static_cast<Attribute::Index>(next_index),
                                                         next_element, buffer)};
            SetRegister(bb, instr.gpr0.Value() + reg_offset, attribute);
            // Load the next attribute element into the following register. If the element
            // to load goes beyond the vec4 size, load the first element of the next
            // attribute.
            next_element = (next_element + 1) % 4;
            next_index = next_index + (next_element == 0 ? 1 : 0);
        };
        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
            LoadNextElement(reg_offset);
        }
        break;
    }
    case OpCode::Id::LD_C: {
        UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
        Node index = GetRegister(instr.gpr8);
        const Node op_a =
            GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
        switch (instr.ld_c.type.Value()) {
        case Tegra::Shader::UniformType::Single:
            SetRegister(bb, instr.gpr0, op_a);
            break;
        case Tegra::Shader::UniformType::Double: {
            const Node op_b =
                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 4, index);
            SetTemporary(bb, 0, op_a);
            SetTemporary(bb, 1, op_b);
            SetRegister(bb, instr.gpr0, GetTemporary(0));
            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporary(1));
            break;
        }
        default:
            UNIMPLEMENTED_MSG("Unhandled type: {}", instr.ld_c.type.Value());
        }
        break;
    }
    case OpCode::Id::LD_L:
        LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", instr.ld_l.unknown);
        [[fallthrough]];
    case OpCode::Id::LD_S: {
        const auto GetAddress = [&](s32 offset) {
            ASSERT(offset % 4 == 0);
            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
        };
        const auto GetMemory = [&](s32 offset) {
            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
                                                             : GetLocalMemory(GetAddress(offset));
        };
        switch (instr.ldst_sl.type.Value()) {
        case StoreType::Signed16:
            SetRegister(bb, instr.gpr0,
                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
            break;
        case StoreType::Bits32:
        case StoreType::Bits64:
        case StoreType::Bits128: {
            const u32 count = [&] {
                switch (instr.ldst_sl.type.Value()) {
                case StoreType::Bits32:
                    return 1;
                case StoreType::Bits64:
                    return 2;
                case StoreType::Bits128:
                    return 4;
                default:
                    UNREACHABLE();
                    return 0;
                }
            }();
            for (u32 i = 0; i < count; ++i) {
                SetTemporary(bb, i, GetMemory(i * 4));
            }
            for (u32 i = 0; i < count; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
            break;
        }
        default:
            UNIMPLEMENTED_MSG("{} Unhandled type: {}", opcode->get().GetName(),
                              instr.ldst_sl.type.Value());
        }
        break;
    }
    case OpCode::Id::LD:
    case OpCode::Id::LDG: {
        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
            switch (opcode->get().GetId()) {
            case OpCode::Id::LD:
                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
                return instr.generic.type;
            case OpCode::Id::LDG:
                return instr.ldg.type;
            default:
                UNREACHABLE();
                return {};
            }
        }();
        const auto [real_address_base, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, true, false);
        const u32 size = GetMemorySize(type);
        const u32 count = Common::AlignUp(size, 32) / 32;
        if (!real_address_base || !base_address) {
            // Tracking failed, load zeroes.
            for (u32 i = 0; i < count; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
            }
            break;
        }
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
            // To handle unaligned loads get the bytes used to dereference global memory and extract
            // those bytes from the loaded u32.
            if (IsUnaligned(type)) {
                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
            }
            SetTemporary(bb, i, gmem);
        }
        for (u32 i = 0; i < count; ++i) {
            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
        break;
    }
    case OpCode::Id::ST_A: {
        UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
                             "Indirect attribute loads are not supported");
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");
        u64 element = instr.attribute.fmt20.element;
        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
            Node dest;
            if (instr.attribute.fmt20.patch) {
                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
                dest = MakeNode<PatchNode>(offset);
            } else {
                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
                                          GetRegister(instr.gpr39));
            }
            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
            bb.push_back(Operation(OperationCode::Assign, dest, src));
            // Load the next attribute element into the following register. If the element to load
            // goes beyond the vec4 size, load the first element of the next attribute.
            element = (element + 1) % 4;
            index = index + (element == 0 ? 1 : 0);
        }
        break;
    }
    case OpCode::Id::ST_L:
        LOG_DEBUG(HW_GPU, "ST_L cache management mode: {}", instr.st_l.cache_management.Value());
        [[fallthrough]];
    case OpCode::Id::ST_S: {
        const auto GetAddress = [&](s32 offset) {
            ASSERT(offset % 4 == 0);
            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
        };
        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
        switch (instr.ldst_sl.type.Value()) {
        case StoreType::Bits128:
            (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
            (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
            [[fallthrough]];
        case StoreType::Bits64:
            (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
            [[fallthrough]];
        case StoreType::Bits32:
            (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
            break;
        case StoreType::Unsigned16:
        case StoreType::Signed16: {
            Node address = GetAddress(0);
            Node memory = (this->*get_memory)(address);
            (this->*set_memory)(
                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
            break;
        }
        default:
            UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
                              instr.ldst_sl.type.Value());
        }
        break;
    }
    case OpCode::Id::ST:
    case OpCode::Id::STG: {
        const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
            switch (opcode->get().GetId()) {
            case OpCode::Id::ST:
                UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
                return instr.generic.type;
            case OpCode::Id::STG:
                return instr.stg.type;
            default:
                UNREACHABLE();
                return {};
            }
        }();
        // For unaligned reads we have to read memory too.
        const bool is_read = IsUnaligned(type);
        const auto [real_address_base, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, is_read, true);
        if (!real_address_base || !base_address) {
            // Tracking failed, skip the store.
            break;
        }
        const u32 size = GetMemorySize(type);
        const u32 count = Common::AlignUp(size, 32) / 32;
        for (u32 i = 0; i < count; ++i) {
            const Node it_offset = Immediate(i * 4);
            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
            const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
            Node value = GetRegister(instr.gpr0.Value() + i);
            if (IsUnaligned(type)) {
                const u32 mask = GetUnalignedMask(type);
                value = InsertUnaligned(gmem, move(value), real_address, mask, size);
            }
            bb.push_back(Operation(OperationCode::Assign, gmem, value));
        }
        break;
    }
    case OpCode::Id::RED: {
        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
                             instr.red.type.Value());
        const auto [real_address, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, true, true);
        if (!real_address || !base_address) {
            // Tracking failed, skip atomic.
            break;
        }
        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
        Node value = GetRegister(instr.gpr0);
        bb.push_back(Operation(GetAtomOperation(instr.red.operation), move(gmem), move(value)));
        break;
    }
    case OpCode::Id::ATOM: {
        UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
                                 instr.atom.operation == AtomicOp::Dec ||
                                 instr.atom.operation == AtomicOp::SafeAdd,
                             "operation={}", instr.atom.operation.Value());
        UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
                                 instr.atom.type == GlobalAtomicType::U64 ||
                                 instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
                                 instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
                             "type={}", instr.atom.type.Value());
        const auto [real_address, base_address, descriptor] =
            TrackGlobalMemory(bb, instr, true, true);
        if (!real_address || !base_address) {
            // Tracking failed, skip atomic.
            break;
        }
        const bool is_signed =
            instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
        SetRegister(bb, instr.gpr0,
                    SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
                                    GetRegister(instr.gpr20)));
        break;
    }
    case OpCode::Id::ATOMS: {
        UNIMPLEMENTED_IF_MSG(instr.atoms.operation == AtomicOp::Inc ||
                                 instr.atoms.operation == AtomicOp::Dec,
                             "operation={}", instr.atoms.operation.Value());
        UNIMPLEMENTED_IF_MSG(instr.atoms.type == AtomicType::S64 ||
                                 instr.atoms.type == AtomicType::U64,
                             "type={}", instr.atoms.type.Value());
        const bool is_signed =
            instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
        const s32 offset = instr.atoms.GetImmediateOffset();
        Node address = GetRegister(instr.gpr8);
        address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
        SetRegister(bb, instr.gpr0,
                    SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
                                    GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
        break;
    }
    case OpCode::Id::AL2P: {
        // Ignore al2p.direction since we don't care about it.
        // Calculate emulation fake physical address.
        const Node fixed_address{Immediate(static_cast<u32>(instr.al2p.address))};
        const Node reg{GetRegister(instr.gpr8)};
        const Node fake_address{Operation(OperationCode::IAdd, NO_PRECISE, reg, fixed_address)};
        // Set the fake address to target register.
        SetRegister(bb, instr.gpr0, fake_address);
        // Signal the shader IR to declare all possible attributes and varyings
        uses_physical_attributes = true;
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                     Instruction instr,
                                                                     bool is_read, bool is_write) {
    const auto addr_register{GetRegister(instr.gmem.gpr)};
    const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
    const auto [base_address, index, offset] =
        TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
    ASSERT_OR_EXECUTE_MSG(
        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
        "Global memory tracking failed");
    bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
    const GlobalMemoryBase descriptor{index, offset};
    const auto& entry = used_global_memory.try_emplace(descriptor).first;
    auto& usage = entry->second;
    usage.is_written |= is_write;
    usage.is_read |= is_read;
    const auto real_address =
        Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
    return {real_address, base_address, descriptor};
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@ -1,322 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::PixelImap;
 using Tegra::Shader::Register;
 using Tegra::Shader::SystemVariable;
 using Index = Tegra::Shader::Attribute::Index;
 u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    switch (opcode->get().GetId()) {
    case OpCode::Id::NOP: {
        UNIMPLEMENTED_IF(instr.nop.cc != Tegra::Shader::ConditionCode::T);
        UNIMPLEMENTED_IF(instr.nop.trigger != 0);
        // With the previous preconditions, this instruction is a no-operation.
        break;
    }
    case OpCode::Id::EXIT: {
        const ConditionCode cc = instr.flow_condition_code;
        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "EXIT condition code used: {}", cc);
        switch (instr.flow.cond) {
        case Tegra::Shader::FlowCondition::Always:
            bb.push_back(Operation(OperationCode::Exit));
            if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
                // If this is an unconditional exit then just end processing here,
                // otherwise we have to account for the possibility of the condition
                // not being met, so continue processing the next instruction.
                pc = MAX_PROGRAM_LENGTH - 1;
            }
            break;
        case Tegra::Shader::FlowCondition::Fcsm_Tr:
            // TODO(bunnei): What is this used for? If we assume this conditon is not
            // satisifed, dual vertex shaders in Farming Simulator make more sense
            UNIMPLEMENTED_MSG("Skipping unknown FlowCondition::Fcsm_Tr");
            break;
        default:
            UNIMPLEMENTED_MSG("Unhandled flow condition: {}", instr.flow.cond.Value());
        }
        break;
    }
    case OpCode::Id::KIL: {
        UNIMPLEMENTED_IF(instr.flow.cond != Tegra::Shader::FlowCondition::Always);
        const ConditionCode cc = instr.flow_condition_code;
        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "KIL condition code used: {}", cc);
        bb.push_back(Operation(OperationCode::Discard));
        break;
    }
    case OpCode::Id::S2R: {
        const Node value = [this, instr] {
            switch (instr.sys20) {
            case SystemVariable::LaneId:
                return Operation(OperationCode::ThreadId);
            case SystemVariable::InvocationId:
                return Operation(OperationCode::InvocationId);
            case SystemVariable::Ydirection:
                uses_y_negate = true;
                return Operation(OperationCode::YNegate);
            case SystemVariable::InvocationInfo:
                LOG_WARNING(HW_GPU, "S2R instruction with InvocationInfo is incomplete");
                return Immediate(0x00ff'0000U);
            case SystemVariable::WscaleFactorXY:
                UNIMPLEMENTED_MSG("S2R WscaleFactorXY is not implemented");
                return Immediate(0U);
            case SystemVariable::WscaleFactorZ:
                UNIMPLEMENTED_MSG("S2R WscaleFactorZ is not implemented");
                return Immediate(0U);
            case SystemVariable::Tid: {
                Node val = Immediate(0);
                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdX), 0, 9);
                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdY), 16, 9);
                val = BitfieldInsert(val, Operation(OperationCode::LocalInvocationIdZ), 26, 5);
                return val;
            }
            case SystemVariable::TidX:
                return Operation(OperationCode::LocalInvocationIdX);
            case SystemVariable::TidY:
                return Operation(OperationCode::LocalInvocationIdY);
            case SystemVariable::TidZ:
                return Operation(OperationCode::LocalInvocationIdZ);
            case SystemVariable::CtaIdX:
                return Operation(OperationCode::WorkGroupIdX);
            case SystemVariable::CtaIdY:
                return Operation(OperationCode::WorkGroupIdY);
            case SystemVariable::CtaIdZ:
                return Operation(OperationCode::WorkGroupIdZ);
            case SystemVariable::EqMask:
            case SystemVariable::LtMask:
            case SystemVariable::LeMask:
            case SystemVariable::GtMask:
            case SystemVariable::GeMask:
                uses_warps = true;
                switch (instr.sys20) {
                case SystemVariable::EqMask:
                    return Operation(OperationCode::ThreadEqMask);
                case SystemVariable::LtMask:
                    return Operation(OperationCode::ThreadLtMask);
                case SystemVariable::LeMask:
                    return Operation(OperationCode::ThreadLeMask);
                case SystemVariable::GtMask:
                    return Operation(OperationCode::ThreadGtMask);
                case SystemVariable::GeMask:
                    return Operation(OperationCode::ThreadGeMask);
                default:
                    UNREACHABLE();
                    return Immediate(0u);
                }
            default:
                UNIMPLEMENTED_MSG("Unhandled system move: {}", instr.sys20.Value());
                return Immediate(0u);
            }
        }();
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::BRA: {
        Node branch;
        if (instr.bra.constant_buffer == 0) {
            const u32 target = pc + instr.bra.GetBranchTarget();
            branch = Operation(OperationCode::Branch, Immediate(target));
        } else {
            const u32 target = pc + 1;
            const Node op_a = GetConstBuffer(instr.cbuf36.index, instr.cbuf36.GetOffset());
            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
                                                 PRECISE, op_a, Immediate(3));
            const Node operand =
                Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
            branch = Operation(OperationCode::BranchIndirect, operand);
        }
        const Tegra::Shader::ConditionCode cc = instr.flow_condition_code;
        if (cc != Tegra::Shader::ConditionCode::T) {
            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
        } else {
            bb.push_back(branch);
        }
        break;
    }
    case OpCode::Id::BRX: {
        Node operand;
        if (instr.brx.constant_buffer != 0) {
            const s32 target = pc + 1;
            const Node index = GetRegister(instr.gpr8);
            const Node op_a =
                GetConstBufferIndirect(instr.cbuf36.index, instr.cbuf36.GetOffset() + 0, index);
            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
                                                 PRECISE, op_a, Immediate(3));
            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
        } else {
            const s32 target = pc + instr.brx.GetBranchExtend();
            const Node op_a = GetRegister(instr.gpr8);
            const Node convert = SignedOperation(OperationCode::IArithmeticShiftRight, true,
                                                 PRECISE, op_a, Immediate(3));
            operand = Operation(OperationCode::IAdd, PRECISE, convert, Immediate(target));
        }
        const Node branch = Operation(OperationCode::BranchIndirect, operand);
        const ConditionCode cc = instr.flow_condition_code;
        if (cc != ConditionCode::T) {
            bb.push_back(Conditional(GetConditionCode(cc), {branch}));
        } else {
            bb.push_back(branch);
        }
        break;
    }
    case OpCode::Id::SSY: {
        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
                             "Constant buffer flow is not supported");
        if (disable_flow_stack) {
            break;
        }
        // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
        const u32 target = pc + instr.bra.GetBranchTarget();
        bb.push_back(
            Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target)));
        break;
    }
    case OpCode::Id::PBK: {
        UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0,
                             "Constant buffer PBK is not supported");
        if (disable_flow_stack) {
            break;
        }
        // PBK pushes to a stack the address where BRK will jump to.
        const u32 target = pc + instr.bra.GetBranchTarget();
        bb.push_back(
            Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target)));
        break;
    }
    case OpCode::Id::SYNC: {
        const ConditionCode cc = instr.flow_condition_code;
        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "SYNC condition code used: {}", cc);
        if (decompiled) {
            break;
        }
        // The SYNC opcode jumps to the address previously set by the SSY opcode
        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy));
        break;
    }
    case OpCode::Id::BRK: {
        const ConditionCode cc = instr.flow_condition_code;
        UNIMPLEMENTED_IF_MSG(cc != ConditionCode::T, "BRK condition code used: {}", cc);
        if (decompiled) {
            break;
        }
        // The BRK opcode jumps to the address previously set by the PBK opcode
        bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk));
        break;
    }
    case OpCode::Id::IPA: {
        const bool is_physical = instr.ipa.idx && instr.gpr8.Value() != 0xff;
        const auto attribute = instr.attribute.fmt28;
        const Index index = attribute.index;
        Node value = is_physical ? GetPhysicalInputAttribute(instr.gpr8)
                                 : GetInputAttribute(index, attribute.element);
        // Code taken from Ryujinx.
        if (index >= Index::Attribute_0 && index <= Index::Attribute_31) {
            const u32 location = static_cast<u32>(index) - static_cast<u32>(Index::Attribute_0);
            if (header.ps.GetPixelImap(location) == PixelImap::Perspective) {
                Node position_w = GetInputAttribute(Index::Position, 3);
                value = Operation(OperationCode::FMul, move(value), move(position_w));
            }
        }
        if (instr.ipa.interp_mode == IpaInterpMode::Multiply) {
            value = Operation(OperationCode::FMul, move(value), GetRegister(instr.gpr20));
        }
        value = GetSaturatedFloat(move(value), instr.ipa.saturate);
        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    case OpCode::Id::OUT_R: {
        UNIMPLEMENTED_IF_MSG(instr.gpr20.Value() != Register::ZeroIndex,
                             "Stream buffer is not supported");
        if (instr.out.emit) {
            // gpr0 is used to store the next address and gpr8 contains the address to emit.
            // Hardware uses pointers here but we just ignore it
            bb.push_back(Operation(OperationCode::EmitVertex));
            SetRegister(bb, instr.gpr0, Immediate(0));
        }
        if (instr.out.cut) {
            bb.push_back(Operation(OperationCode::EndPrimitive));
        }
        break;
    }
    case OpCode::Id::ISBERD: {
        UNIMPLEMENTED_IF(instr.isberd.o != 0);
        UNIMPLEMENTED_IF(instr.isberd.skew != 0);
        UNIMPLEMENTED_IF(instr.isberd.shift != Tegra::Shader::IsberdShift::None);
        UNIMPLEMENTED_IF(instr.isberd.mode != Tegra::Shader::IsberdMode::None);
        LOG_WARNING(HW_GPU, "ISBERD instruction is incomplete");
        SetRegister(bb, instr.gpr0, GetRegister(instr.gpr8));
        break;
    }
    case OpCode::Id::BAR: {
        UNIMPLEMENTED_IF_MSG(instr.value != 0xF0A81B8000070000ULL, "BAR is not BAR.SYNC 0x0");
        bb.push_back(Operation(OperationCode::Barrier));
        break;
    }
    case OpCode::Id::MEMBAR: {
        UNIMPLEMENTED_IF(instr.membar.unknown != Tegra::Shader::MembarUnknown::Default);
        const OperationCode type = [instr] {
            switch (instr.membar.type) {
            case Tegra::Shader::MembarType::CTA:
                return OperationCode::MemoryBarrierGroup;
            case Tegra::Shader::MembarType::GL:
                return OperationCode::MemoryBarrierGlobal;
            default:
                UNIMPLEMENTED_MSG("MEMBAR type={}", instr.membar.type.Value());
                return OperationCode::MemoryBarrierGlobal;
            }
        }();
        bb.push_back(Operation(type));
        break;
    }
    case OpCode::Id::DEPBAR: {
        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/predicate_set_predicate.cpp
+++ b/src/video_core/shader/decode/predicate_set_predicate.cpp
@ -1,68 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 u32 ShaderIR::DecodePredicateSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    switch (opcode->get().GetId()) {
    case OpCode::Id::PSETP: {
        const Node op_a = GetPredicate(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
        const Node op_b = GetPredicate(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
        // We can't use the constant predicate as destination.
        ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
        const Node second_pred = GetPredicate(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
        const OperationCode combiner = GetPredicateCombiner(instr.psetp.op);
        const Node predicate = Operation(combiner, op_a, op_b);
        // Set the primary predicate to the result of Predicate OP SecondPredicate
        SetPredicate(bb, instr.psetp.pred3, Operation(combiner, predicate, second_pred));
        if (instr.psetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
            // Set the secondary predicate to the result of !Predicate OP SecondPredicate, if
            // enabled
            SetPredicate(bb, instr.psetp.pred0,
                         Operation(combiner, Operation(OperationCode::LogicalNegate, predicate),
                                   second_pred));
        }
        break;
    }
    case OpCode::Id::CSETP: {
        const Node pred = GetPredicate(instr.csetp.pred39, instr.csetp.neg_pred39 != 0);
        const Node condition_code = GetConditionCode(instr.csetp.cc);
        const OperationCode combiner = GetPredicateCombiner(instr.csetp.op);
        if (instr.csetp.pred3 != static_cast<u64>(Pred::UnusedIndex)) {
            SetPredicate(bb, instr.csetp.pred3, Operation(combiner, condition_code, pred));
        }
        if (instr.csetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
            const Node neg_cc = Operation(OperationCode::LogicalNegate, condition_code);
            SetPredicate(bb, instr.csetp.pred0, Operation(combiner, neg_cc, pred));
        }
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled predicate instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/predicate_set_register.cpp
+++ b/src/video_core/shader/decode/predicate_set_register.cpp
@ -1,46 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 u32 ShaderIR::DecodePredicateSetRegister(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                         "Condition codes generation in PSET is not implemented");
    const Node op_a = GetPredicate(instr.pset.pred12, instr.pset.neg_pred12 != 0);
    const Node op_b = GetPredicate(instr.pset.pred29, instr.pset.neg_pred29 != 0);
    const Node first_pred = Operation(GetPredicateCombiner(instr.pset.cond), op_a, op_b);
    const Node second_pred = GetPredicate(instr.pset.pred39, instr.pset.neg_pred39 != 0);
    const OperationCode combiner = GetPredicateCombiner(instr.pset.op);
    const Node predicate = Operation(combiner, first_pred, second_pred);
    const Node true_value = instr.pset.bf ? Immediate(1.0f) : Immediate(0xffffffff);
    const Node false_value = instr.pset.bf ? Immediate(0.0f) : Immediate(0);
    const Node value =
        Operation(OperationCode::Select, PRECISE, predicate, true_value, false_value);
    if (instr.pset.bf) {
        SetInternalFlagsFromFloat(bb, value, instr.generates_cc);
    } else {
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
    }
    SetRegister(bb, instr.gpr0, value);
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/register_set_predicate.cpp
+++ b/src/video_core/shader/decode/register_set_predicate.cpp
@ -1,86 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <utility>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 namespace {
 constexpr u64 NUM_CONDITION_CODES = 4;
 constexpr u64 NUM_PREDICATES = 7;
 } // namespace
 u32 ShaderIR::DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node apply_mask = [this, opcode, instr] {
        switch (opcode->get().GetId()) {
        case OpCode::Id::R2P_IMM:
        case OpCode::Id::P2R_IMM:
            return Immediate(static_cast<u32>(instr.p2r_r2p.immediate_mask));
        default:
            UNREACHABLE();
            return Immediate(0);
        }
    }();
    const u32 offset = static_cast<u32>(instr.p2r_r2p.byte) * 8;
    const bool cc = instr.p2r_r2p.mode == Tegra::Shader::R2pMode::Cc;
    const u64 num_entries = cc ? NUM_CONDITION_CODES : NUM_PREDICATES;
    const auto get_entry = [this, cc](u64 entry) {
        return cc ? GetInternalFlag(static_cast<InternalFlag>(entry)) : GetPredicate(entry);
    };
    switch (opcode->get().GetId()) {
    case OpCode::Id::R2P_IMM: {
        Node mask = GetRegister(instr.gpr8);
        for (u64 entry = 0; entry < num_entries; ++entry) {
            const u32 shift = static_cast<u32>(entry);
            Node apply = BitfieldExtract(apply_mask, shift, 1);
            Node condition = Operation(OperationCode::LogicalUNotEqual, apply, Immediate(0));
            Node compare = BitfieldExtract(mask, offset + shift, 1);
            Node value = Operation(OperationCode::LogicalUNotEqual, move(compare), Immediate(0));
            Node code = Operation(OperationCode::LogicalAssign, get_entry(entry), move(value));
            bb.push_back(Conditional(condition, {move(code)}));
        }
        break;
    }
    case OpCode::Id::P2R_IMM: {
        Node value = Immediate(0);
        for (u64 entry = 0; entry < num_entries; ++entry) {
            Node bit = Operation(OperationCode::Select, get_entry(entry), Immediate(1U << entry),
                                 Immediate(0));
            value = Operation(OperationCode::UBitwiseOr, move(value), move(bit));
        }
        value = Operation(OperationCode::UBitwiseAnd, move(value), apply_mask);
        value = BitfieldInsert(GetRegister(instr.gpr8), move(value), offset, 8);
        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled P2R/R2R instruction: {}", opcode->get().GetName());
        break;
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@ -1,153 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::ShfType;
 using Tegra::Shader::ShfXmode;
 namespace {
 Node IsFull(Node shift) {
    return Operation(OperationCode::LogicalIEqual, move(shift), Immediate(32));
 }
 Node Shift(OperationCode opcode, Node value, Node shift) {
    Node shifted = Operation(opcode, move(value), shift);
    return Operation(OperationCode::Select, IsFull(move(shift)), Immediate(0), move(shifted));
 }
 Node ClampShift(Node shift, s32 size = 32) {
    shift = Operation(OperationCode::IMax, move(shift), Immediate(0));
    return Operation(OperationCode::IMin, move(shift), Immediate(size));
 }
 Node WrapShift(Node shift, s32 size = 32) {
    return Operation(OperationCode::UBitwiseAnd, move(shift), Immediate(size - 1));
 }
 Node ShiftRight(Node low, Node high, Node shift, Node low_shift, ShfType type) {
    // These values are used when the shift value is less than 32
    Node less_low = Shift(OperationCode::ILogicalShiftRight, low, shift);
    Node less_high = Shift(OperationCode::ILogicalShiftLeft, high, low_shift);
    Node less = Operation(OperationCode::IBitwiseOr, move(less_high), move(less_low));
    if (type == ShfType::Bits32) {
        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
        return Operation(OperationCode::Select, IsFull(move(shift)), move(high), move(less));
    }
    // And these when it's larger than or 32
    const bool is_signed = type == ShfType::S64;
    const auto opcode = SignedToUnsignedCode(OperationCode::IArithmeticShiftRight, is_signed);
    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
    Node greater = Shift(opcode, high, move(reduced));
    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
 }
 Node ShiftLeft(Node low, Node high, Node shift, Node low_shift, ShfType type) {
    // These values are used when the shift value is less than 32
    Node less_low = Operation(OperationCode::ILogicalShiftRight, low, low_shift);
    Node less_high = Operation(OperationCode::ILogicalShiftLeft, high, shift);
    Node less = Operation(OperationCode::IBitwiseOr, move(less_low), move(less_high));
    if (type == ShfType::Bits32) {
        // On 32 bit shifts we are either full (shifting 32) or shifting less than 32 bits
        return Operation(OperationCode::Select, IsFull(move(shift)), move(low), move(less));
    }
    // And these when it's larger than or 32
    Node reduced = Operation(OperationCode::IAdd, shift, Immediate(-32));
    Node greater = Shift(OperationCode::ILogicalShiftLeft, move(low), move(reduced));
    Node is_less = Operation(OperationCode::LogicalILessThan, shift, Immediate(32));
    Node is_zero = Operation(OperationCode::LogicalIEqual, move(shift), Immediate(0));
    Node value = Operation(OperationCode::Select, move(is_less), move(less), move(greater));
    return Operation(OperationCode::Select, move(is_zero), move(high), move(value));
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = [this, instr] {
        if (instr.is_b_imm) {
            return Immediate(instr.alu.GetSignedImm20_20());
        } else if (instr.is_b_gpr) {
            return GetRegister(instr.gpr20);
        } else {
            return GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset());
        }
    }();
    switch (const auto opid = opcode->get().GetId(); opid) {
    case OpCode::Id::SHR_C:
    case OpCode::Id::SHR_R:
    case OpCode::Id::SHR_IMM: {
        op_b = instr.shr.wrap ? WrapShift(move(op_b)) : ClampShift(move(op_b));
        Node value = SignedOperation(OperationCode::IArithmeticShiftRight, instr.shift.is_signed,
                                     move(op_a), move(op_b));
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    case OpCode::Id::SHL_C:
    case OpCode::Id::SHL_R:
    case OpCode::Id::SHL_IMM: {
        Node value = Operation(OperationCode::ILogicalShiftLeft, op_a, op_b);
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    case OpCode::Id::SHF_RIGHT_R:
    case OpCode::Id::SHF_RIGHT_IMM:
    case OpCode::Id::SHF_LEFT_R:
    case OpCode::Id::SHF_LEFT_IMM: {
        UNIMPLEMENTED_IF(instr.generates_cc);
        UNIMPLEMENTED_IF_MSG(instr.shf.xmode != ShfXmode::None, "xmode={}",
                             instr.shf.xmode.Value());
        if (instr.is_b_imm) {
            op_b = Immediate(static_cast<u32>(instr.shf.immediate));
        }
        const s32 size = instr.shf.type == ShfType::Bits32 ? 32 : 64;
        Node shift = instr.shf.wrap ? WrapShift(move(op_b), size) : ClampShift(move(op_b), size);
        Node negated_shift = Operation(OperationCode::INegate, shift);
        Node low_shift = Operation(OperationCode::IAdd, move(negated_shift), Immediate(32));
        const bool is_right = opid == OpCode::Id::SHF_RIGHT_R || opid == OpCode::Id::SHF_RIGHT_IMM;
        Node value = (is_right ? ShiftRight : ShiftLeft)(
            move(op_a), GetRegister(instr.gpr39), move(shift), move(low_shift), instr.shf.type);
        SetRegister(bb, instr.gpr0, move(value));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@ -1,935 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <vector>
 #include <fmt/format.h>
 #include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 using Tegra::Shader::TextureMiscMode;
 using Tegra::Shader::TextureProcessMode;
 using Tegra::Shader::TextureType;
 static std::size_t GetCoordCount(TextureType texture_type) {
    switch (texture_type) {
    case TextureType::Texture1D:
        return 1;
    case TextureType::Texture2D:
        return 2;
    case TextureType::Texture3D:
    case TextureType::TextureCube:
        return 3;
    default:
        UNIMPLEMENTED_MSG("Unhandled texture type: {}", texture_type);
        return 0;
    }
 }
 u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    bool is_bindless = false;
    switch (opcode->get().GetId()) {
    case OpCode::Id::TEX: {
        const TextureType texture_type{instr.tex.texture_type};
        const bool is_array = instr.tex.array != 0;
        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
        const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
        const auto process_mode = instr.tex.GetTextureProcessMode();
        WriteTexInstructionFloat(
            bb, instr,
            GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
        break;
    }
    case OpCode::Id::TEX_B: {
        UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
        const TextureType texture_type{instr.tex_b.texture_type};
        const bool is_array = instr.tex_b.array != 0;
        const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
        const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
        const auto process_mode = instr.tex_b.GetTextureProcessMode();
        WriteTexInstructionFloat(bb, instr,
                                 GetTexCode(instr, texture_type, process_mode, depth_compare,
                                            is_array, is_aoffi, {instr.gpr20}));
        break;
    }
    case OpCode::Id::TEXS: {
        const TextureType texture_type{instr.texs.GetTextureType()};
        const bool is_array{instr.texs.IsArrayTexture()};
        const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
        const auto process_mode = instr.texs.GetTextureProcessMode();
        const Node4 components =
            GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
        if (instr.texs.fp32_flag) {
            WriteTexsInstructionFloat(bb, instr, components);
        } else {
            WriteTexsInstructionHalfFloat(bb, instr, components);
        }
        break;
    }
    case OpCode::Id::TLD4_B: {
        is_bindless = true;
        [[fallthrough]];
    }
    case OpCode::Id::TLD4: {
        UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
                             "NDV is not implemented");
        const auto texture_type = instr.tld4.texture_type.Value();
        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
        const bool is_array = instr.tld4.array != 0;
        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
        const bool is_ptp = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::PTP)
                                        : instr.tld4.UsesMiscMode(TextureMiscMode::PTP);
        WriteTexInstructionFloat(bb, instr,
                                 GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi,
                                             is_ptp, is_bindless));
        break;
    }
    case OpCode::Id::TLD4S: {
        constexpr std::size_t num_coords = 2;
        const bool is_aoffi = instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI);
        const bool is_depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
        const Node op_a = GetRegister(instr.gpr8);
        const Node op_b = GetRegister(instr.gpr20);
        // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
        std::vector<Node> coords;
        std::vector<Node> aoffi;
        Node depth_compare;
        if (is_depth_compare) {
            // Note: TLD4S coordinate encoding works just like TEXS's
            const Node op_y = GetRegister(instr.gpr8.Value() + 1);
            coords.push_back(op_a);
            coords.push_back(op_y);
            if (is_aoffi) {
                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
                depth_compare = GetRegister(instr.gpr20.Value() + 1);
            } else {
                depth_compare = op_b;
            }
        } else {
            // There's no depth compare
            coords.push_back(op_a);
            if (is_aoffi) {
                coords.push_back(GetRegister(instr.gpr8.Value() + 1));
                aoffi = GetAoffiCoordinates(op_b, num_coords, true);
            } else {
                coords.push_back(op_b);
            }
        }
        const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
        SamplerInfo info;
        info.is_shadow = is_depth_compare;
        const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
        Node4 values;
        for (u32 element = 0; element < values.size(); ++element) {
            MetaTexture meta{*sampler, {}, depth_compare, aoffi,   {}, {},
                             {},       {}, component,     element, {}};
            values[element] = Operation(OperationCode::TextureGather, meta, coords);
        }
        if (instr.tld4s.fp16_flag) {
            WriteTexsInstructionHalfFloat(bb, instr, values, true);
        } else {
            WriteTexsInstructionFloat(bb, instr, values, true);
        }
        break;
    }
    case OpCode::Id::TXD_B:
        is_bindless = true;
        [[fallthrough]];
    case OpCode::Id::TXD: {
        UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
        const bool is_array = instr.txd.is_array != 0;
        const auto derivate_reg = instr.gpr20.Value();
        const auto texture_type = instr.txd.texture_type.Value();
        const auto coord_count = GetCoordCount(texture_type);
        u64 base_reg = instr.gpr8.Value();
        Node index_var;
        SamplerInfo info;
        info.type = texture_type;
        info.is_array = is_array;
        const std::optional<SamplerEntry> sampler =
            is_bindless ? GetBindlessSampler(base_reg, info, index_var)
                        : GetSampler(instr.sampler, info);
        Node4 values;
        if (!sampler) {
            std::generate(values.begin(), values.end(), [this] { return Immediate(0); });
            WriteTexInstructionFloat(bb, instr, values);
            break;
        }
        if (is_bindless) {
            base_reg++;
        }
        std::vector<Node> coords;
        std::vector<Node> derivates;
        for (std::size_t i = 0; i < coord_count; ++i) {
            coords.push_back(GetRegister(base_reg + i));
            const std::size_t derivate = i * 2;
            derivates.push_back(GetRegister(derivate_reg + derivate));
            derivates.push_back(GetRegister(derivate_reg + derivate + 1));
        }
        Node array_node = {};
        if (is_array) {
            const Node info_reg = GetRegister(base_reg + coord_count);
            array_node = BitfieldExtract(info_reg, 0, 16);
        }
        for (u32 element = 0; element < values.size(); ++element) {
            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates,
                             {},       {},         {}, element, index_var};
            values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
        }
        WriteTexInstructionFloat(bb, instr, values);
        break;
    }
    case OpCode::Id::TXQ_B:
        is_bindless = true;
        [[fallthrough]];
    case OpCode::Id::TXQ: {
        Node index_var;
        const std::optional<SamplerEntry> sampler =
            is_bindless ? GetBindlessSampler(instr.gpr8, {}, index_var)
                        : GetSampler(instr.sampler, {});
        if (!sampler) {
            u32 indexer = 0;
            for (u32 element = 0; element < 4; ++element) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
                const Node value = Immediate(0);
                SetTemporary(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
            break;
        }
        u32 indexer = 0;
        switch (instr.txq.query_type) {
        case Tegra::Shader::TextureQueryType::Dimension: {
            for (u32 element = 0; element < 4; ++element) {
                if (!instr.txq.IsComponentEnabled(element)) {
                    continue;
                }
                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
                const Node value =
                    Operation(OperationCode::TextureQueryDimensions, meta,
                              GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
                SetTemporary(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
            break;
        }
        default:
            UNIMPLEMENTED_MSG("Unhandled texture query type: {}", instr.txq.query_type.Value());
        }
        break;
    }
    case OpCode::Id::TMML_B:
        is_bindless = true;
        [[fallthrough]];
    case OpCode::Id::TMML: {
        UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
                             "NDV is not implemented");
        const auto texture_type = instr.tmml.texture_type.Value();
        const bool is_array = instr.tmml.array != 0;
        SamplerInfo info;
        info.type = texture_type;
        info.is_array = is_array;
        Node index_var;
        const std::optional<SamplerEntry> sampler =
            is_bindless ? GetBindlessSampler(instr.gpr20, info, index_var)
                        : GetSampler(instr.sampler, info);
        if (!sampler) {
            u32 indexer = 0;
            for (u32 element = 0; element < 2; ++element) {
                if (!instr.tmml.IsComponentEnabled(element)) {
                    continue;
                }
                const Node value = Immediate(0);
                SetTemporary(bb, indexer++, value);
            }
            for (u32 i = 0; i < indexer; ++i) {
                SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
            }
            break;
        }
        const u64 base_index = is_array ? 1 : 0;
        const u64 num_components = [texture_type] {
            switch (texture_type) {
            case TextureType::Texture1D:
                return 1;
            case TextureType::Texture2D:
                return 2;
            case TextureType::TextureCube:
                return 3;
            default:
                UNIMPLEMENTED_MSG("Unhandled texture type {}", texture_type);
                return 2;
            }
        }();
        // TODO: What's the array component used for?
        std::vector<Node> coords;
        coords.reserve(num_components);
        for (u64 component = 0; component < num_components; ++component) {
            coords.push_back(GetRegister(instr.gpr8.Value() + base_index + component));
        }
        u32 indexer = 0;
        for (u32 element = 0; element < 2; ++element) {
            if (!instr.tmml.IsComponentEnabled(element)) {
                continue;
            }
            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
            Node value = Operation(OperationCode::TextureQueryLod, meta, coords);
            SetTemporary(bb, indexer++, std::move(value));
        }
        for (u32 i = 0; i < indexer; ++i) {
            SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
        }
        break;
    }
    case OpCode::Id::TLD: {
        UNIMPLEMENTED_IF_MSG(instr.tld.aoffi, "AOFFI is not implemented");
        UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
        UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
        WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
        break;
    }
    case OpCode::Id::TLDS: {
        const TextureType texture_type{instr.tlds.GetTextureType()};
        const bool is_array{instr.tlds.IsArrayTexture()};
        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
                             "AOFFI is not implemented");
        UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
        const Node4 components = GetTldsCode(instr, texture_type, is_array);
        if (instr.tlds.fp32_flag) {
            WriteTexsInstructionFloat(bb, instr, components);
        } else {
            WriteTexsInstructionHalfFloat(bb, instr, components);
        }
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled memory instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
    SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
    if (info.IsComplete()) {
        return info;
    }
    if (!sampler) {
        LOG_WARNING(HW_GPU, "Unknown sampler info");
        info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
        info.is_array = info.is_array.value_or(false);
        info.is_shadow = info.is_shadow.value_or(false);
        info.is_buffer = info.is_buffer.value_or(false);
        return info;
    }
    info.type = info.type.value_or(sampler->texture_type);
    info.is_array = info.is_array.value_or(sampler->is_array != 0);
    info.is_shadow = info.is_shadow.value_or(sampler->is_shadow != 0);
    info.is_buffer = info.is_buffer.value_or(sampler->is_buffer != 0);
    return info;
 }
 std::optional<SamplerEntry> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
                                                 SamplerInfo sampler_info) {
    const u32 offset = static_cast<u32>(sampler.index.Value());
    const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
    // If this sampler has already been used, return the existing mapping.
    const auto it =
        std::find_if(used_samplers.begin(), used_samplers.end(),
                     [offset](const SamplerEntry& entry) { return entry.offset == offset; });
    if (it != used_samplers.end()) {
        ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
               it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
        return *it;
    }
    // Otherwise create a new mapping for this sampler
    const auto next_index = static_cast<u32>(used_samplers.size());
    return used_samplers.emplace_back(next_index, offset, *info.type, *info.is_array,
                                      *info.is_shadow, *info.is_buffer, false);
 }
 std::optional<SamplerEntry> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
                                                         SamplerInfo info, Node& index_var) {
    const Node sampler_register = GetRegister(reg);
    const auto [base_node, tracked_sampler_info] =
        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
    if (!base_node) {
        UNREACHABLE();
        return std::nullopt;
    }
    if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
        const u32 buffer = sampler_info->index;
        const u32 offset = sampler_info->offset;
        info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
        // If this sampler has already been used, return the existing mapping.
        const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
                                     [buffer, offset](const SamplerEntry& entry) {
                                         return entry.buffer == buffer && entry.offset == offset;
                                     });
        if (it != used_samplers.end()) {
            ASSERT(it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
                   it->is_shadow == info.is_shadow);
            return *it;
        }
        // Otherwise create a new mapping for this sampler
        const auto next_index = static_cast<u32>(used_samplers.size());
        return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
                                          *info.is_shadow, *info.is_buffer, false);
    }
    if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
        const std::pair indices = sampler_info->indices;
        const std::pair offsets = sampler_info->offsets;
        info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
        // Try to use an already created sampler if it exists
        const auto it =
            std::find_if(used_samplers.begin(), used_samplers.end(),
                         [indices, offsets](const SamplerEntry& entry) {
                             return offsets == std::pair{entry.offset, entry.secondary_offset} &&
                                    indices == std::pair{entry.buffer, entry.secondary_buffer};
                         });
        if (it != used_samplers.end()) {
            ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
            return *it;
        }
        // Otherwise create a new mapping for this sampler
        const u32 next_index = static_cast<u32>(used_samplers.size());
        return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
                                          *info.is_shadow, *info.is_buffer);
    }
    if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
        const u32 base_offset = sampler_info->base_offset / 4;
        index_var = GetCustomVariable(sampler_info->bindless_var);
        info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
        // If this sampler has already been used, return the existing mapping.
        const auto it = std::find_if(
            used_samplers.begin(), used_samplers.end(),
            [base_offset](const SamplerEntry& entry) { return entry.offset == base_offset; });
        if (it != used_samplers.end()) {
            ASSERT(!it->is_bindless && it->type == info.type && it->is_array == info.is_array &&
                   it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer &&
                   it->is_indexed);
            return *it;
        }
        uses_indexed_samplers = true;
        // Otherwise create a new mapping for this sampler
        const auto next_index = static_cast<u32>(used_samplers.size());
        return used_samplers.emplace_back(next_index, base_offset, *info.type, *info.is_array,
                                          *info.is_shadow, *info.is_buffer, true);
    }
    return std::nullopt;
 }
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
    u32 dest_elem = 0;
    for (u32 elem = 0; elem < 4; ++elem) {
        if (!instr.tex.IsComponentEnabled(elem)) {
            // Skip disabled components
            continue;
        }
        SetTemporary(bb, dest_elem++, components[elem]);
    }
    // After writing values in temporals, move them to the real registers
    for (u32 i = 0; i < dest_elem; ++i) {
        SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i));
    }
 }
 void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
                                         bool ignore_mask) {
    // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
    // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
    u32 dest_elem = 0;
    for (u32 component = 0; component < 4; ++component) {
        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
            continue;
        SetTemporary(bb, dest_elem++, components[component]);
    }
    for (u32 i = 0; i < dest_elem; ++i) {
        if (i < 2) {
            // Write the first two swizzle components to gpr0 and gpr0+1
            SetRegister(bb, instr.gpr0.Value() + i % 2, GetTemporary(i));
        } else {
            ASSERT(instr.texs.HasTwoDestinations());
            // Write the rest of the swizzle components to gpr28 and gpr28+1
            SetRegister(bb, instr.gpr28.Value() + i % 2, GetTemporary(i));
        }
    }
 }
 void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
                                             const Node4& components, bool ignore_mask) {
    // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
    // float instruction).
    Node4 values;
    u32 dest_elem = 0;
    for (u32 component = 0; component < 4; ++component) {
        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
            continue;
        values[dest_elem++] = components[component];
    }
    if (dest_elem == 0)
        return;
    std::generate(values.begin() + dest_elem, values.end(), [&]() { return Immediate(0); });
    const Node first_value = Operation(OperationCode::HPack2, values[0], values[1]);
    if (dest_elem <= 2) {
        SetRegister(bb, instr.gpr0, first_value);
        return;
    }
    SetTemporary(bb, 0, first_value);
    SetTemporary(bb, 1, Operation(OperationCode::HPack2, values[2], values[3]));
    SetRegister(bb, instr.gpr0, GetTemporary(0));
    SetRegister(bb, instr.gpr28, GetTemporary(1));
 }
 Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                               TextureProcessMode process_mode, std::vector<Node> coords,
                               Node array, Node depth_compare, u32 bias_offset,
                               std::vector<Node> aoffi,
                               std::optional<Tegra::Shader::Register> bindless_reg) {
    const bool is_array = array != nullptr;
    const bool is_shadow = depth_compare != nullptr;
    const bool is_bindless = bindless_reg.has_value();
    ASSERT_MSG(texture_type != TextureType::Texture3D || !is_array || !is_shadow,
               "Illegal texture type");
    SamplerInfo info;
    info.type = texture_type;
    info.is_array = is_array;
    info.is_shadow = is_shadow;
    info.is_buffer = false;
    Node index_var;
    const std::optional<SamplerEntry> sampler =
        is_bindless ? GetBindlessSampler(*bindless_reg, info, index_var)
                    : GetSampler(instr.sampler, info);
    if (!sampler) {
        return {Immediate(0), Immediate(0), Immediate(0), Immediate(0)};
    }
    const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                            process_mode == TextureProcessMode::LL ||
                            process_mode == TextureProcessMode::LLA;
    const OperationCode opcode = lod_needed ? OperationCode::TextureLod : OperationCode::Texture;
    Node bias;
    Node lod;
    switch (process_mode) {
    case TextureProcessMode::None:
        break;
    case TextureProcessMode::LZ:
        lod = Immediate(0.0f);
        break;
    case TextureProcessMode::LB:
        // If present, lod or bias are always stored in the register indexed by the gpr20 field with
        // an offset depending on the usage of the other registers.
        bias = GetRegister(instr.gpr20.Value() + bias_offset);
        break;
    case TextureProcessMode::LL:
        lod = GetRegister(instr.gpr20.Value() + bias_offset);
        break;
    default:
        UNIMPLEMENTED_MSG("Unimplemented process mode={}", process_mode);
        break;
    }
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
                         lod,      {},    element,       index_var};
        values[element] = Operation(opcode, meta, coords);
    }
    return values;
 }
 Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
                           TextureProcessMode process_mode, bool depth_compare, bool is_array,
                           bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
    const bool lod_bias_enabled{
        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
    const bool is_bindless = bindless_reg.has_value();
    u64 parameter_register = instr.gpr20.Value();
    if (is_bindless) {
        ++parameter_register;
    }
    const u32 bias_lod_offset = (is_bindless ? 1 : 0);
    if (lod_bias_enabled) {
        ++parameter_register;
    }
    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
                                                              lod_bias_enabled, 4, 5);
    const auto coord_count = std::get<0>(coord_counts);
    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
    const u64 coord_register = array_register + (is_array ? 1 : 0);
    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
    }
    // 1D.DC in OpenGL the 2nd component is ignored.
    if (depth_compare && !is_array && texture_type == TextureType::Texture1D) {
        coords.push_back(Immediate(0.0f));
    }
    const Node array = is_array ? GetRegister(array_register) : nullptr;
    std::vector<Node> aoffi;
    if (is_aoffi) {
        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
    }
    Node dc;
    if (depth_compare) {
        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
        // or bias are used
        dc = GetRegister(parameter_register++);
    }
    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
                          aoffi, bindless_reg);
 }
 Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
                            TextureProcessMode process_mode, bool depth_compare, bool is_array) {
    const bool lod_bias_enabled =
        (process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
    const auto coord_counts = ValidateAndGetCoordinateElement(texture_type, depth_compare, is_array,
                                                              lod_bias_enabled, 4, 4);
    const auto coord_count = std::get<0>(coord_counts);
    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
    // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
    const u64 coord_register = array_register + (is_array ? 1 : 0);
    const u64 last_coord_register =
        (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
            ? static_cast<u64>(instr.gpr20.Value())
            : coord_register + 1;
    const u32 bias_offset = coord_count > 2 ? 1 : 0;
    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
        const bool last = (i == (coord_count - 1)) && (coord_count > 1);
        coords.push_back(GetRegister(last ? last_coord_register : coord_register + i));
    }
    const Node array = is_array ? GetRegister(array_register) : nullptr;
    Node dc;
    if (depth_compare) {
        // Depth is always stored in the register signaled by gpr20 or in the next register if lod
        // or bias are used
        const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
        dc = GetRegister(depth_register);
    }
    return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
                          {});
 }
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
                            bool is_array, bool is_aoffi, bool is_ptp, bool is_bindless) {
    ASSERT_MSG(!(is_aoffi && is_ptp), "AOFFI and PTP can't be enabled at the same time");
    const std::size_t coord_count = GetCoordCount(texture_type);
    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
    // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
    const u64 coord_register = array_register + (is_array ? 1 : 0);
    std::vector<Node> coords;
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(coord_register + i));
    }
    u64 parameter_register = instr.gpr20.Value();
    SamplerInfo info;
    info.type = texture_type;
    info.is_array = is_array;
    info.is_shadow = depth_compare;
    Node index_var;
    const std::optional<SamplerEntry> sampler =
        is_bindless ? GetBindlessSampler(parameter_register++, info, index_var)
                    : GetSampler(instr.sampler, info);
    Node4 values;
    if (!sampler) {
        for (u32 element = 0; element < values.size(); ++element) {
            values[element] = Immediate(0);
        }
        return values;
    }
    std::vector<Node> aoffi, ptp;
    if (is_aoffi) {
        aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
    } else if (is_ptp) {
        ptp = GetPtpCoordinates(
            {GetRegister(parameter_register++), GetRegister(parameter_register++)});
    }
    Node dc;
    if (depth_compare) {
        dc = GetRegister(parameter_register++);
    }
    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
                                       : Immediate(static_cast<u32>(instr.tld4.component));
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
        MetaTexture meta{
            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
            index_var};
        values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
    }
    return values;
 }
 Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
    const auto texture_type{instr.tld.texture_type};
    const bool is_array{instr.tld.is_array != 0};
    const bool lod_enabled{instr.tld.GetTextureProcessMode() == TextureProcessMode::LL};
    const std::size_t coord_count{GetCoordCount(texture_type)};
    u64 gpr8_cursor{instr.gpr8.Value()};
    const Node array_register{is_array ? GetRegister(gpr8_cursor++) : nullptr};
    std::vector<Node> coords;
    coords.reserve(coord_count);
    for (std::size_t i = 0; i < coord_count; ++i) {
        coords.push_back(GetRegister(gpr8_cursor++));
    }
    u64 gpr20_cursor{instr.gpr20.Value()};
    // const Node bindless_register{is_bindless ? GetRegister(gpr20_cursor++) : nullptr};
    const Node lod{lod_enabled ? GetRegister(gpr20_cursor++) : Immediate(0u)};
    // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
    // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, {});
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
        MetaTexture meta{*sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
 }
 Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is_array) {
    SamplerInfo info;
    info.type = texture_type;
    info.is_array = is_array;
    info.is_shadow = false;
    const std::optional<SamplerEntry> sampler = GetSampler(instr.sampler, info);
    const std::size_t type_coord_count = GetCoordCount(texture_type);
    const bool lod_enabled = instr.tlds.GetTextureProcessMode() == TextureProcessMode::LL;
    const bool aoffi_enabled = instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI);
    // If enabled arrays index is always stored in the gpr8 field
    const u64 array_register = instr.gpr8.Value();
    // if is array gpr20 is used
    const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
    const u64 last_coord_register =
        ((type_coord_count > 2) || (type_coord_count == 2 && !lod_enabled)) && !is_array
            ? static_cast<u64>(instr.gpr20.Value())
            : coord_register + 1;
    std::vector<Node> coords;
    for (std::size_t i = 0; i < type_coord_count; ++i) {
        const bool last = (i == (type_coord_count - 1)) && (type_coord_count > 1);
        coords.push_back(
            GetRegister(last && !aoffi_enabled ? last_coord_register : coord_register + i));
    }
    const Node array = is_array ? GetRegister(array_register) : nullptr;
    // When lod is used always is in gpr20
    const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
    std::vector<Node> aoffi;
    if (aoffi_enabled) {
        aoffi = GetAoffiCoordinates(GetRegister(instr.gpr20), type_coord_count, false);
    }
    Node4 values;
    for (u32 element = 0; element < values.size(); ++element) {
        auto coords_copy = coords;
        MetaTexture meta{*sampler, array, {}, aoffi, {}, {}, {}, lod, {}, element, {}};
        values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
    }
    return values;
 }
 std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
    TextureType texture_type, bool depth_compare, bool is_array, bool lod_bias_enabled,
    std::size_t max_coords, std::size_t max_inputs) {
    const std::size_t coord_count = GetCoordCount(texture_type);
    std::size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
    const std::size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
    if (total_coord_count > max_coords || total_reg_count > max_inputs) {
        UNIMPLEMENTED_MSG("Unsupported Texture operation");
        total_coord_count = std::min(total_coord_count, max_coords);
    }
    // 1D.DC OpenGL is using a vec3 but 2nd component is ignored later.
    total_coord_count +=
        (depth_compare && !is_array && texture_type == TextureType::Texture1D) ? 1 : 0;
    return {coord_count, total_coord_count};
 }
 std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
                                                bool is_tld4) {
    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
    const u32 size = is_tld4 ? 6 : 4;
    const s32 wrap_value = is_tld4 ? 32 : 8;
    const s32 diff_value = is_tld4 ? 64 : 16;
    const u32 mask = (1U << size) - 1;
    std::vector<Node> aoffi;
    aoffi.reserve(coord_count);
    const auto aoffi_immediate{
        TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
    if (!aoffi_immediate) {
        // Variable access, not supported on AMD.
        LOG_WARNING(HW_GPU,
                    "AOFFI constant folding failed, some hardware might have graphical issues");
        for (std::size_t coord = 0; coord < coord_count; ++coord) {
            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
            const Node condition =
                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
            aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
        }
        return aoffi;
    }
    for (std::size_t coord = 0; coord < coord_count; ++coord) {
        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
        if (value >= wrap_value) {
            value -= diff_value;
        }
        aoffi.push_back(Immediate(value));
    }
    return aoffi;
 }
 std::vector<Node> ShaderIR::GetPtpCoordinates(std::array<Node, 2> ptp_regs) {
    static constexpr u32 num_entries = 8;
    std::vector<Node> ptp;
    ptp.reserve(num_entries);
    const auto global_size = static_cast<s64>(global_code.size());
    const std::optional low = TrackImmediate(ptp_regs[0], global_code, global_size);
    const std::optional high = TrackImmediate(ptp_regs[1], global_code, global_size);
    if (!low || !high) {
        for (u32 entry = 0; entry < num_entries; ++entry) {
            const u32 reg = entry / 4;
            const u32 offset = entry % 4;
            const Node value = BitfieldExtract(ptp_regs[reg], offset * 8, 6);
            const Node condition =
                Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(32));
            const Node negative = Operation(OperationCode::IAdd, value, Immediate(-64));
            ptp.push_back(Operation(OperationCode::Select, condition, negative, value));
        }
        return ptp;
    }
    const u64 immediate = (static_cast<u64>(*high) << 32) | static_cast<u64>(*low);
    for (u32 entry = 0; entry < num_entries; ++entry) {
        s32 value = (immediate >> (entry * 8)) & 0b111111;
        if (value >= 32) {
            value -= 64;
        }
        ptp.push_back(Immediate(value));
    }
    return ptp;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@ -1,169 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using std::move;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::VideoType;
 using Tegra::Shader::VmadShr;
 using Tegra::Shader::VmnmxOperation;
 using Tegra::Shader::VmnmxType;
 u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    if (opcode->get().GetId() == OpCode::Id::VMNMX) {
        DecodeVMNMX(bb, instr);
        return pc;
    }
    const Node op_a =
        GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                        instr.video.type_a, instr.video.byte_height_a);
    const Node op_b = [this, instr] {
        if (instr.video.use_register_b) {
            return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
                                   instr.video.signed_b, instr.video.type_b,
                                   instr.video.byte_height_b);
        }
        if (instr.video.signed_b) {
            const auto imm = static_cast<s16>(instr.alu.GetImm20_16());
            return Immediate(static_cast<u32>(imm));
        } else {
            return Immediate(instr.alu.GetImm20_16());
        }
    }();
    switch (opcode->get().GetId()) {
    case OpCode::Id::VMAD: {
        const bool result_signed = instr.video.signed_a == 1 || instr.video.signed_b == 1;
        const Node op_c = GetRegister(instr.gpr39);
        Node value = SignedOperation(OperationCode::IMul, result_signed, NO_PRECISE, op_a, op_b);
        value = SignedOperation(OperationCode::IAdd, result_signed, NO_PRECISE, value, op_c);
        if (instr.vmad.shr == VmadShr::Shr7 || instr.vmad.shr == VmadShr::Shr15) {
            const Node shift = Immediate(instr.vmad.shr == VmadShr::Shr7 ? 7 : 15);
            value =
                SignedOperation(OperationCode::IArithmeticShiftRight, result_signed, value, shift);
        }
        SetInternalFlagsFromInteger(bb, value, instr.generates_cc);
        SetRegister(bb, instr.gpr0, value);
        break;
    }
    case OpCode::Id::VSETP: {
        // We can't use the constant predicate as destination.
        ASSERT(instr.vsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
        const bool sign = instr.video.signed_a == 1 || instr.video.signed_b == 1;
        const Node first_pred = GetPredicateComparisonInteger(instr.vsetp.cond, sign, op_a, op_b);
        const Node second_pred = GetPredicate(instr.vsetp.pred39, false);
        const OperationCode combiner = GetPredicateCombiner(instr.vsetp.op);
        // Set the primary predicate to the result of Predicate OP SecondPredicate
        SetPredicate(bb, instr.vsetp.pred3, Operation(combiner, first_pred, second_pred));
        if (instr.vsetp.pred0 != static_cast<u64>(Pred::UnusedIndex)) {
            // Set the secondary predicate to the result of !Predicate OP SecondPredicate,
            // if enabled
            const Node negate_pred = Operation(OperationCode::LogicalNegate, first_pred);
            SetPredicate(bb, instr.vsetp.pred0, Operation(combiner, negate_pred, second_pred));
        }
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled video instruction: {}", opcode->get().GetName());
    }
    return pc;
 }
 Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
                               u64 byte_height) {
    if (!is_chunk) {
        return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
    }
    switch (type) {
    case VideoType::Size16_Low:
        return BitfieldExtract(op, 0, 16);
    case VideoType::Size16_High:
        return BitfieldExtract(op, 16, 16);
    case VideoType::Size32:
        // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
        // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
        UNIMPLEMENTED();
        return Immediate(0);
    case VideoType::Invalid:
        UNREACHABLE_MSG("Invalid instruction encoding");
        return Immediate(0);
    default:
        UNREACHABLE();
        return Immediate(0);
    }
 }
 void ShaderIR::DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr) {
    UNIMPLEMENTED_IF(!instr.vmnmx.is_op_b_register);
    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatA() != VmnmxType::Bits32);
    UNIMPLEMENTED_IF(instr.vmnmx.SourceFormatB() != VmnmxType::Bits32);
    UNIMPLEMENTED_IF(instr.vmnmx.is_src_a_signed != instr.vmnmx.is_src_b_signed);
    UNIMPLEMENTED_IF(instr.vmnmx.sat);
    UNIMPLEMENTED_IF(instr.generates_cc);
    Node op_a = GetRegister(instr.gpr8);
    Node op_b = GetRegister(instr.gpr20);
    Node op_c = GetRegister(instr.gpr39);
    const bool is_oper1_signed = instr.vmnmx.is_src_a_signed; // Stubbed
    const bool is_oper2_signed = instr.vmnmx.is_dest_signed;
    const auto operation_a = instr.vmnmx.mx ? OperationCode::IMax : OperationCode::IMin;
    Node value = SignedOperation(operation_a, is_oper1_signed, move(op_a), move(op_b));
    switch (instr.vmnmx.operation) {
    case VmnmxOperation::Mrg_16H:
        value = BitfieldInsert(move(op_c), move(value), 16, 16);
        break;
    case VmnmxOperation::Mrg_16L:
        value = BitfieldInsert(move(op_c), move(value), 0, 16);
        break;
    case VmnmxOperation::Mrg_8B0:
        value = BitfieldInsert(move(op_c), move(value), 0, 8);
        break;
    case VmnmxOperation::Mrg_8B2:
        value = BitfieldInsert(move(op_c), move(value), 16, 8);
        break;
    case VmnmxOperation::Acc:
        value = Operation(OperationCode::IAdd, move(value), move(op_c));
        break;
    case VmnmxOperation::Min:
        value = SignedOperation(OperationCode::IMin, is_oper2_signed, move(value), move(op_c));
        break;
    case VmnmxOperation::Max:
        value = SignedOperation(OperationCode::IMax, is_oper2_signed, move(value), move(op_c));
        break;
    case VmnmxOperation::Nop:
        break;
    default:
        UNREACHABLE();
        break;
    }
    SetRegister(bb, instr.gpr0, move(value));
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@ -1,117 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::ShuffleOperation;
 using Tegra::Shader::VoteOperation;
 namespace {
 OperationCode GetOperationCode(VoteOperation vote_op) {
    switch (vote_op) {
    case VoteOperation::All:
        return OperationCode::VoteAll;
    case VoteOperation::Any:
        return OperationCode::VoteAny;
    case VoteOperation::Eq:
        return OperationCode::VoteEqual;
    default:
        UNREACHABLE_MSG("Invalid vote operation={}", vote_op);
        return OperationCode::VoteAll;
    }
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    // Signal the backend that this shader uses warp instructions.
    uses_warps = true;
    switch (opcode->get().GetId()) {
    case OpCode::Id::VOTE: {
        const Node value = GetPredicate(instr.vote.value, instr.vote.negate_value != 0);
        const Node active = Operation(OperationCode::BallotThread, value);
        const Node vote = Operation(GetOperationCode(instr.vote.operation), value);
        SetRegister(bb, instr.gpr0, active);
        SetPredicate(bb, instr.vote.dest_pred, vote);
        break;
    }
    case OpCode::Id::SHFL: {
        Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
                                           : GetRegister(instr.gpr39);
        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
                                             : GetRegister(instr.gpr20);
        Node thread_id = Operation(OperationCode::ThreadId);
        Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
        Node seg_mask = BitfieldExtract(mask, 8, 16);
        Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
        Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
        Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
                                       Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
        Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
            switch (instr.shfl.operation) {
            case ShuffleOperation::Idx:
                return Operation(OperationCode::IBitwiseOr,
                                 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
                                 min_thread_id);
            case ShuffleOperation::Down:
                return Operation(OperationCode::IAdd, thread_id, index);
            case ShuffleOperation::Up:
                return Operation(OperationCode::IAdd, thread_id,
                                 Operation(OperationCode::INegate, index));
            case ShuffleOperation::Bfly:
                return Operation(OperationCode::IBitwiseXor, thread_id, index);
            }
            UNREACHABLE();
            return Immediate(0U);
        }();
        Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
            if (instr.shfl.operation == ShuffleOperation::Up) {
                return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
            } else {
                return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
            }
        }();
        SetPredicate(bb, instr.shfl.pred48, in_bounds);
        SetRegister(
            bb, instr.gpr0,
            Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
        break;
    }
    case OpCode::Id::FSWZADD: {
        UNIMPLEMENTED_IF(instr.fswzadd.ndv);
        Node op_a = GetRegister(instr.gpr8);
        Node op_b = GetRegister(instr.gpr20);
        Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
        SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
        break;
    }
    default:
        UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName());
        break;
    }
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@ -1,156 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::PredCondition;
 u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
    const Instruction instr = {program_code[pc]};
    const auto opcode = OpCode::Decode(instr);
    UNIMPLEMENTED_IF(instr.xmad.sign_a);
    UNIMPLEMENTED_IF(instr.xmad.sign_b);
    UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                         "Condition codes generation in XMAD is not implemented");
    Node op_a = GetRegister(instr.gpr8);
    // TODO(bunnei): Needs to be fixed once op_a or op_b is signed
    UNIMPLEMENTED_IF(instr.xmad.sign_a != instr.xmad.sign_b);
    const bool is_signed_a = instr.xmad.sign_a == 1;
    const bool is_signed_b = instr.xmad.sign_b == 1;
    const bool is_signed_c = is_signed_a;
    auto [is_merge, is_psl, is_high_b, mode, op_b_binding,
          op_c] = [&]() -> std::tuple<bool, bool, bool, Tegra::Shader::XmadMode, Node, Node> {
        switch (opcode->get().GetId()) {
        case OpCode::Id::XMAD_CR:
            return {instr.xmad.merge_56,
                    instr.xmad.product_shift_left_second,
                    instr.xmad.high_b,
                    instr.xmad.mode_cbf,
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                    GetRegister(instr.gpr39)};
        case OpCode::Id::XMAD_RR:
            return {instr.xmad.merge_37, instr.xmad.product_shift_left, instr.xmad.high_b_rr,
                    instr.xmad.mode,     GetRegister(instr.gpr20),      GetRegister(instr.gpr39)};
        case OpCode::Id::XMAD_RC:
            return {false,
                    false,
                    instr.xmad.high_b,
                    instr.xmad.mode_cbf,
                    GetRegister(instr.gpr39),
                    GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset())};
        case OpCode::Id::XMAD_IMM:
            return {instr.xmad.merge_37,
                    instr.xmad.product_shift_left,
                    false,
                    instr.xmad.mode,
                    Immediate(static_cast<u32>(instr.xmad.imm20_16)),
                    GetRegister(instr.gpr39)};
        default:
            UNIMPLEMENTED_MSG("Unhandled XMAD instruction: {}", opcode->get().GetName());
            return {false, false, false, Tegra::Shader::XmadMode::None, Immediate(0), Immediate(0)};
        }
    }();
    op_a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(op_a),
                           instr.xmad.high_a ? Immediate(16) : Immediate(0), Immediate(16));
    const Node original_b = op_b_binding;
    const Node op_b =
        SignedOperation(OperationCode::IBitfieldExtract, is_signed_b, std::move(op_b_binding),
                        is_high_b ? Immediate(16) : Immediate(0), Immediate(16));
    // we already check sign_a and sign_b is difference or not before so just use one in here.
    Node product = SignedOperation(OperationCode::IMul, is_signed_a, op_a, op_b);
    if (is_psl) {
        product =
            SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_a, product, Immediate(16));
    }
    SetTemporary(bb, 0, product);
    product = GetTemporary(0);
    Node original_c = op_c;
    const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
    op_c = [&] {
        switch (set_mode) {
        case Tegra::Shader::XmadMode::None:
            return original_c;
        case Tegra::Shader::XmadMode::CLo:
            return BitfieldExtract(std::move(original_c), 0, 16);
        case Tegra::Shader::XmadMode::CHi:
            return BitfieldExtract(std::move(original_c), 16, 16);
        case Tegra::Shader::XmadMode::CBcc: {
            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
                                             original_b, Immediate(16));
            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
                                   std::move(shifted_b));
        }
        case Tegra::Shader::XmadMode::CSfu: {
            const Node comp_a =
                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_a, op_a, Immediate(0));
            const Node comp_b =
                GetPredicateComparisonInteger(PredCondition::EQ, is_signed_b, op_b, Immediate(0));
            const Node comp = Operation(OperationCode::LogicalOr, comp_a, comp_b);
            const Node comp_minus_a = GetPredicateComparisonInteger(
                PredCondition::NE, is_signed_a,
                SignedOperation(OperationCode::IBitwiseAnd, is_signed_a, op_a,
                                Immediate(0x80000000)),
                Immediate(0));
            const Node comp_minus_b = GetPredicateComparisonInteger(
                PredCondition::NE, is_signed_b,
                SignedOperation(OperationCode::IBitwiseAnd, is_signed_b, op_b,
                                Immediate(0x80000000)),
                Immediate(0));
            Node new_c = Operation(
                OperationCode::Select, comp_minus_a,
                SignedOperation(OperationCode::IAdd, is_signed_c, original_c, Immediate(-65536)),
                original_c);
            new_c = Operation(
                OperationCode::Select, comp_minus_b,
                SignedOperation(OperationCode::IAdd, is_signed_c, new_c, Immediate(-65536)),
                std::move(new_c));
            return Operation(OperationCode::Select, comp, original_c, std::move(new_c));
        }
        default:
            UNREACHABLE();
            return Immediate(0);
        }
    }();
    SetTemporary(bb, 1, op_c);
    op_c = GetTemporary(1);
    // TODO(Rodrigo): Use an appropiate sign for this operation
    Node sum = SignedOperation(OperationCode::IAdd, is_signed_a, product, std::move(op_c));
    SetTemporary(bb, 2, sum);
    sum = GetTemporary(2);
    if (is_merge) {
        const Node a = SignedOperation(OperationCode::IBitfieldExtract, is_signed_a, std::move(sum),
                                       Immediate(0), Immediate(16));
        const Node b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b, original_b,
                                       Immediate(16));
        sum = SignedOperation(OperationCode::IBitwiseOr, is_signed_a, a, b);
    }
    SetInternalFlagsFromInteger(bb, sum, instr.generates_cc);
    SetRegister(bb, instr.gpr0, std::move(sum));
    return pc;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/expr.cpp
+++ b/src/video_core/shader/expr.cpp
@ -1,93 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <memory>
 #include <variant>
 #include "video_core/shader/expr.h"
 namespace VideoCommon::Shader {
 namespace {
 bool ExprIsBoolean(const Expr& expr) {
    return std::holds_alternative<ExprBoolean>(*expr);
 }
 bool ExprBooleanGet(const Expr& expr) {
    return std::get_if<ExprBoolean>(expr.get())->value;
 }
 } // Anonymous namespace
 bool ExprAnd::operator==(const ExprAnd& b) const {
    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
 }
 bool ExprAnd::operator!=(const ExprAnd& b) const {
    return !operator==(b);
 }
 bool ExprOr::operator==(const ExprOr& b) const {
    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
 }
 bool ExprOr::operator!=(const ExprOr& b) const {
    return !operator==(b);
 }
 bool ExprNot::operator==(const ExprNot& b) const {
    return *operand1 == *b.operand1;
 }
 bool ExprNot::operator!=(const ExprNot& b) const {
    return !operator==(b);
 }
 Expr MakeExprNot(Expr first) {
    if (std::holds_alternative<ExprNot>(*first)) {
        return std::get_if<ExprNot>(first.get())->operand1;
    }
    return MakeExpr<ExprNot>(std::move(first));
 }
 Expr MakeExprAnd(Expr first, Expr second) {
    if (ExprIsBoolean(first)) {
        return ExprBooleanGet(first) ? second : first;
    }
    if (ExprIsBoolean(second)) {
        return ExprBooleanGet(second) ? first : second;
    }
    return MakeExpr<ExprAnd>(std::move(first), std::move(second));
 }
 Expr MakeExprOr(Expr first, Expr second) {
    if (ExprIsBoolean(first)) {
        return ExprBooleanGet(first) ? first : second;
    }
    if (ExprIsBoolean(second)) {
        return ExprBooleanGet(second) ? second : first;
    }
    return MakeExpr<ExprOr>(std::move(first), std::move(second));
 }
 bool ExprAreEqual(const Expr& first, const Expr& second) {
    return (*first) == (*second);
 }
 bool ExprAreOpposite(const Expr& first, const Expr& second) {
    if (std::holds_alternative<ExprNot>(*first)) {
        return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
    }
    if (std::holds_alternative<ExprNot>(*second)) {
        return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
    }
    return false;
 }
 bool ExprIsTrue(const Expr& first) {
    if (ExprIsBoolean(first)) {
        return ExprBooleanGet(first);
    }
    return false;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/expr.h
+++ b/src/video_core/shader/expr.h
@ -1,156 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <memory>
 #include <variant>
 #include "video_core/engines/shader_bytecode.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::ConditionCode;
 using Tegra::Shader::Pred;
 class ExprAnd;
 class ExprBoolean;
 class ExprCondCode;
 class ExprGprEqual;
 class ExprNot;
 class ExprOr;
 class ExprPredicate;
 class ExprVar;
 using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
                              ExprBoolean, ExprGprEqual>;
 using Expr = std::shared_ptr<ExprData>;
 class ExprAnd final {
 public:
    explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
    bool operator==(const ExprAnd& b) const;
    bool operator!=(const ExprAnd& b) const;
    Expr operand1;
    Expr operand2;
 };
 class ExprOr final {
 public:
    explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
    bool operator==(const ExprOr& b) const;
    bool operator!=(const ExprOr& b) const;
    Expr operand1;
    Expr operand2;
 };
 class ExprNot final {
 public:
    explicit ExprNot(Expr a) : operand1{std::move(a)} {}
    bool operator==(const ExprNot& b) const;
    bool operator!=(const ExprNot& b) const;
    Expr operand1;
 };
 class ExprVar final {
 public:
    explicit ExprVar(u32 index) : var_index{index} {}
    bool operator==(const ExprVar& b) const {
        return var_index == b.var_index;
    }
    bool operator!=(const ExprVar& b) const {
        return !operator==(b);
    }
    u32 var_index;
 };
 class ExprPredicate final {
 public:
    explicit ExprPredicate(u32 predicate_) : predicate{predicate_} {}
    bool operator==(const ExprPredicate& b) const {
        return predicate == b.predicate;
    }
    bool operator!=(const ExprPredicate& b) const {
        return !operator==(b);
    }
    u32 predicate;
 };
 class ExprCondCode final {
 public:
    explicit ExprCondCode(ConditionCode condition_code) : cc{condition_code} {}
    bool operator==(const ExprCondCode& b) const {
        return cc == b.cc;
    }
    bool operator!=(const ExprCondCode& b) const {
        return !operator==(b);
    }
    ConditionCode cc;
 };
 class ExprBoolean final {
 public:
    explicit ExprBoolean(bool val) : value{val} {}
    bool operator==(const ExprBoolean& b) const {
        return value == b.value;
    }
    bool operator!=(const ExprBoolean& b) const {
        return !operator==(b);
    }
    bool value;
 };
 class ExprGprEqual final {
 public:
    explicit ExprGprEqual(u32 gpr_, u32 value_) : gpr{gpr_}, value{value_} {}
    bool operator==(const ExprGprEqual& b) const {
        return gpr == b.gpr && value == b.value;
    }
    bool operator!=(const ExprGprEqual& b) const {
        return !operator==(b);
    }
    u32 gpr;
    u32 value;
 };
 template <typename T, typename... Args>
 Expr MakeExpr(Args&&... args) {
    static_assert(std::is_convertible_v<T, ExprData>);
    return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
 }
 bool ExprAreEqual(const Expr& first, const Expr& second);
 bool ExprAreOpposite(const Expr& first, const Expr& second);
 Expr MakeExprNot(Expr first);
 Expr MakeExprAnd(Expr first, Expr second);
 Expr MakeExprOr(Expr first, Expr second);
 bool ExprIsTrue(const Expr& first);
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@ -1,76 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <cstddef>
 #include <boost/container_hash/hash.hpp>
 #include "common/common_types.h"
 #include "core/core.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
    const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
    return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
 }
 bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
    // Sched instructions appear once every 4 instructions.
    constexpr std::size_t SchedPeriod = 4;
    const std::size_t absolute_offset = offset - main_offset;
    return (absolute_offset % SchedPeriod) == 0;
 }
 std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
    // This is the encoded version of BRA that jumps to itself. All Nvidia
    // shaders end with one.
    static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
    static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
    const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
    std::size_t offset = start_offset;
    while (offset < program.size()) {
        const u64 instruction = program[offset];
        if (!IsSchedInstruction(offset, start_offset)) {
            if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
                // End on Maxwell's "nop" instruction
                break;
            }
            if (instruction == 0) {
                break;
            }
        }
        ++offset;
    }
    // The last instruction is included in the program size
    return std::min(offset + 1, program.size());
 }
 ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
                          const u8* host_ptr, bool is_compute) {
    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
    ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
    code.resize(CalculateProgramSize(code, is_compute));
    return code;
 }
 u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
                        const ProgramCode& code_b) {
    size_t unique_identifier = boost::hash_value(code);
    if (is_a) {
        // VertexA programs include two programs
        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
    }
    return static_cast<u64>(unique_identifier);
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@ -1,43 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <cstddef>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 namespace Tegra {
 class MemoryManager;
 }
 namespace VideoCommon::Shader {
 using ProgramCode = std::vector<u64>;
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;
 /// Gets the address for the specified shader stage program
 GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
 /// Gets if the current instruction offset is a scheduler instruction
 bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
 /// Calculates the size of a program stream
 std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
 /// Gets the shader program code from memory for the specified address
 ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
                          const u8* host_ptr, bool is_compute);
 /// Hashes one (or two) program streams
 u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
                        const ProgramCode& code_b = {});
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@ -1,701 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <array>
 #include <cstddef>
 #include <memory>
 #include <optional>
 #include <string>
 #include <tuple>
 #include <utility>
 #include <variant>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 namespace VideoCommon::Shader {
 enum class OperationCode {
    Assign, /// (float& dest, float src) -> void
    Select, /// (MetaArithmetic, bool pred, float a, float b) -> float
    FAdd,          /// (MetaArithmetic, float a, float b) -> float
    FMul,          /// (MetaArithmetic, float a, float b) -> float
    FDiv,          /// (MetaArithmetic, float a, float b) -> float
    FFma,          /// (MetaArithmetic, float a, float b, float c) -> float
    FNegate,       /// (MetaArithmetic, float a) -> float
    FAbsolute,     /// (MetaArithmetic, float a) -> float
    FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
    FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
    FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
    FMin,          /// (MetaArithmetic, float a, float b) -> float
    FMax,          /// (MetaArithmetic, float a, float b) -> float
    FCos,          /// (MetaArithmetic, float a) -> float
    FSin,          /// (MetaArithmetic, float a) -> float
    FExp2,         /// (MetaArithmetic, float a) -> float
    FLog2,         /// (MetaArithmetic, float a) -> float
    FInverseSqrt,  /// (MetaArithmetic, float a) -> float
    FSqrt,         /// (MetaArithmetic, float a) -> float
    FRoundEven,    /// (MetaArithmetic, float a) -> float
    FFloor,        /// (MetaArithmetic, float a) -> float
    FCeil,         /// (MetaArithmetic, float a) -> float
    FTrunc,        /// (MetaArithmetic, float a) -> float
    FCastInteger,  /// (MetaArithmetic, int a) -> float
    FCastUInteger, /// (MetaArithmetic, uint a) -> float
    FSwizzleAdd,   /// (float a, float b, uint mask) -> float
    IAdd,                  /// (MetaArithmetic, int a, int b) -> int
    IMul,                  /// (MetaArithmetic, int a, int b) -> int
    IDiv,                  /// (MetaArithmetic, int a, int b) -> int
    INegate,               /// (MetaArithmetic, int a) -> int
    IAbsolute,             /// (MetaArithmetic, int a) -> int
    IMin,                  /// (MetaArithmetic, int a, int b) -> int
    IMax,                  /// (MetaArithmetic, int a, int b) -> int
    ICastFloat,            /// (MetaArithmetic, float a) -> int
    ICastUnsigned,         /// (MetaArithmetic, uint a) -> int
    ILogicalShiftLeft,     /// (MetaArithmetic, int a, uint b) -> int
    ILogicalShiftRight,    /// (MetaArithmetic, int a, uint b) -> int
    IArithmeticShiftRight, /// (MetaArithmetic, int a, uint b) -> int
    IBitwiseAnd,           /// (MetaArithmetic, int a, int b) -> int
    IBitwiseOr,            /// (MetaArithmetic, int a, int b) -> int
    IBitwiseXor,           /// (MetaArithmetic, int a, int b) -> int
    IBitwiseNot,           /// (MetaArithmetic, int a) -> int
    IBitfieldInsert,       /// (MetaArithmetic, int base, int insert, int offset, int bits) -> int
    IBitfieldExtract,      /// (MetaArithmetic, int value, int offset, int offset) -> int
    IBitCount,             /// (MetaArithmetic, int) -> int
    IBitMSB,               /// (MetaArithmetic, int) -> int
    UAdd,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UMul,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UDiv,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UMin,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UMax,                  /// (MetaArithmetic, uint a, uint b) -> uint
    UCastFloat,            /// (MetaArithmetic, float a) -> uint
    UCastSigned,           /// (MetaArithmetic, int a) -> uint
    ULogicalShiftLeft,     /// (MetaArithmetic, uint a, uint b) -> uint
    ULogicalShiftRight,    /// (MetaArithmetic, uint a, uint b) -> uint
    UArithmeticShiftRight, /// (MetaArithmetic, uint a, uint b) -> uint
    UBitwiseAnd,           /// (MetaArithmetic, uint a, uint b) -> uint
    UBitwiseOr,            /// (MetaArithmetic, uint a, uint b) -> uint
    UBitwiseXor,           /// (MetaArithmetic, uint a, uint b) -> uint
    UBitwiseNot,           /// (MetaArithmetic, uint a) -> uint
    UBitfieldInsert,  /// (MetaArithmetic, uint base, uint insert, int offset, int bits) -> uint
    UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
    UBitCount,        /// (MetaArithmetic, uint) -> uint
    UBitMSB,          /// (MetaArithmetic, uint) -> uint
    HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
    HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
    HAbsolute,  /// (f16vec2 a) -> f16vec2
    HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
    HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
    HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
    HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
    HMergeF32,  /// (f16vec2 src) -> float
    HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
    HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
    HPack2,     /// (float a, float b) -> f16vec2
    LogicalAssign, /// (bool& dst, bool src) -> void
    LogicalAnd,    /// (bool a, bool b) -> bool
    LogicalOr,     /// (bool a, bool b) -> bool
    LogicalXor,    /// (bool a, bool b) -> bool
    LogicalNegate, /// (bool a) -> bool
    LogicalPick2,  /// (bool2 pair, uint index) -> bool
    LogicalAnd2,   /// (bool2 a) -> bool
    LogicalFOrdLessThan,       /// (float a, float b) -> bool
    LogicalFOrdEqual,          /// (float a, float b) -> bool
    LogicalFOrdLessEqual,      /// (float a, float b) -> bool
    LogicalFOrdGreaterThan,    /// (float a, float b) -> bool
    LogicalFOrdNotEqual,       /// (float a, float b) -> bool
    LogicalFOrdGreaterEqual,   /// (float a, float b) -> bool
    LogicalFOrdered,           /// (float a, float b) -> bool
    LogicalFUnordered,         /// (float a, float b) -> bool
    LogicalFUnordLessThan,     /// (float a, float b) -> bool
    LogicalFUnordEqual,        /// (float a, float b) -> bool
    LogicalFUnordLessEqual,    /// (float a, float b) -> bool
    LogicalFUnordGreaterThan,  /// (float a, float b) -> bool
    LogicalFUnordNotEqual,     /// (float a, float b) -> bool
    LogicalFUnordGreaterEqual, /// (float a, float b) -> bool
    LogicalILessThan,     /// (int a, int b) -> bool
    LogicalIEqual,        /// (int a, int b) -> bool
    LogicalILessEqual,    /// (int a, int b) -> bool
    LogicalIGreaterThan,  /// (int a, int b) -> bool
    LogicalINotEqual,     /// (int a, int b) -> bool
    LogicalIGreaterEqual, /// (int a, int b) -> bool
    LogicalULessThan,     /// (uint a, uint b) -> bool
    LogicalUEqual,        /// (uint a, uint b) -> bool
    LogicalULessEqual,    /// (uint a, uint b) -> bool
    LogicalUGreaterThan,  /// (uint a, uint b) -> bool
    LogicalUNotEqual,     /// (uint a, uint b) -> bool
    LogicalUGreaterEqual, /// (uint a, uint b) -> bool
    LogicalAddCarry, /// (uint a, uint b) -> bool
    Logical2HLessThan,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HEqual,               /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HLessEqual,           /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterThan,         /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HNotEqual,            /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterEqual,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HLessThanWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HEqualWithNan,        /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HLessEqualWithNan,    /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterThanWithNan,  /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HNotEqualWithNan,     /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Logical2HGreaterEqualWithNan, /// (MetaHalfArithmetic, f16vec2 a, f16vec2) -> bool2
    Texture,                /// (MetaTexture, float[N] coords) -> float4
    TextureLod,             /// (MetaTexture, float[N] coords) -> float4
    TextureGather,          /// (MetaTexture, float[N] coords) -> float4
    TextureQueryDimensions, /// (MetaTexture, float a) -> float4
    TextureQueryLod,        /// (MetaTexture, float[N] coords) -> float4
    TexelFetch,             /// (MetaTexture, int[N], int) -> float4
    TextureGradient,        /// (MetaTexture, float[N] coords, float[N*2] derivates) -> float4
    ImageLoad,  /// (MetaImage, int[N] coords) -> void
    ImageStore, /// (MetaImage, int[N] coords) -> void
    AtomicImageAdd,      /// (MetaImage, int[N] coords) -> void
    AtomicImageAnd,      /// (MetaImage, int[N] coords) -> void
    AtomicImageOr,       /// (MetaImage, int[N] coords) -> void
    AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
    AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
    AtomicUExchange, /// (memory, uint) -> uint
    AtomicUAdd,      /// (memory, uint) -> uint
    AtomicUMin,      /// (memory, uint) -> uint
    AtomicUMax,      /// (memory, uint) -> uint
    AtomicUAnd,      /// (memory, uint) -> uint
    AtomicUOr,       /// (memory, uint) -> uint
    AtomicUXor,      /// (memory, uint) -> uint
    AtomicIExchange, /// (memory, int) -> int
    AtomicIAdd,      /// (memory, int) -> int
    AtomicIMin,      /// (memory, int) -> int
    AtomicIMax,      /// (memory, int) -> int
    AtomicIAnd,      /// (memory, int) -> int
    AtomicIOr,       /// (memory, int) -> int
    AtomicIXor,      /// (memory, int) -> int
    ReduceUAdd, /// (memory, uint) -> void
    ReduceUMin, /// (memory, uint) -> void
    ReduceUMax, /// (memory, uint) -> void
    ReduceUAnd, /// (memory, uint) -> void
    ReduceUOr,  /// (memory, uint) -> void
    ReduceUXor, /// (memory, uint) -> void
    ReduceIAdd, /// (memory, int) -> void
    ReduceIMin, /// (memory, int) -> void
    ReduceIMax, /// (memory, int) -> void
    ReduceIAnd, /// (memory, int) -> void
    ReduceIOr,  /// (memory, int) -> void
    ReduceIXor, /// (memory, int) -> void
    Branch,         /// (uint branch_target) -> void
    BranchIndirect, /// (uint branch_target) -> void
    PushFlowStack,  /// (uint branch_target) -> void
    PopFlowStack,   /// () -> void
    Exit,           /// () -> void
    Discard,        /// () -> void
    EmitVertex,   /// () -> void
    EndPrimitive, /// () -> void
    InvocationId,       /// () -> int
    YNegate,            /// () -> float
    LocalInvocationIdX, /// () -> uint
    LocalInvocationIdY, /// () -> uint
    LocalInvocationIdZ, /// () -> uint
    WorkGroupIdX,       /// () -> uint
    WorkGroupIdY,       /// () -> uint
    WorkGroupIdZ,       /// () -> uint
    BallotThread, /// (bool) -> uint
    VoteAll,      /// (bool) -> bool
    VoteAny,      /// (bool) -> bool
    VoteEqual,    /// (bool) -> bool
    ThreadId,       /// () -> uint
    ThreadEqMask,   /// () -> uint
    ThreadGeMask,   /// () -> uint
    ThreadGtMask,   /// () -> uint
    ThreadLeMask,   /// () -> uint
    ThreadLtMask,   /// () -> uint
    ShuffleIndexed, /// (uint value, uint index) -> uint
    Barrier,             /// () -> void
    MemoryBarrierGroup,  /// () -> void
    MemoryBarrierGlobal, /// () -> void
    Amount,
 };
 enum class InternalFlag {
    Zero = 0,
    Sign = 1,
    Carry = 2,
    Overflow = 3,
    Amount = 4,
 };
 enum class MetaStackClass {
    Ssy,
    Pbk,
 };
 class OperationNode;
 class ConditionalNode;
 class GprNode;
 class CustomVarNode;
 class ImmediateNode;
 class InternalFlagNode;
 class PredicateNode;
 class AbufNode;
 class CbufNode;
 class LmemNode;
 class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;
 using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
                              LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
 struct ArraySamplerNode;
 struct BindlessSamplerNode;
 struct SeparateSamplerNode;
 using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
 using TrackSampler = std::shared_ptr<TrackSamplerData>;
 struct SamplerEntry {
    /// Bound samplers constructor
    explicit SamplerEntry(u32 index_, u32 offset_, Tegra::Shader::TextureType type_, bool is_array_,
                          bool is_shadow_, bool is_buffer_, bool is_indexed_)
        : index{index_}, offset{offset_}, type{type_}, is_array{is_array_}, is_shadow{is_shadow_},
          is_buffer{is_buffer_}, is_indexed{is_indexed_} {}
    /// Separate sampler constructor
    explicit SamplerEntry(u32 index_, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
                          Tegra::Shader::TextureType type_, bool is_array_, bool is_shadow_,
                          bool is_buffer_)
        : index{index_}, offset{offsets.first}, secondary_offset{offsets.second},
          buffer{buffers.first}, secondary_buffer{buffers.second}, type{type_}, is_array{is_array_},
          is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_separated{true} {}
    /// Bindless samplers constructor
    explicit SamplerEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::TextureType type_,
                          bool is_array_, bool is_shadow_, bool is_buffer_, bool is_indexed_)
        : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_array{is_array_},
          is_shadow{is_shadow_}, is_buffer{is_buffer_}, is_bindless{true}, is_indexed{is_indexed_} {
    }
    u32 index = 0;            ///< Emulated index given for the this sampler.
    u32 offset = 0;           ///< Offset in the const buffer from where the sampler is being read.
    u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
    u32 buffer = 0;           ///< Buffer where the bindless sampler is read.
    u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
    u32 size = 1;             ///< Size of the sampler.
    Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
    bool is_array = false;     ///< Whether the texture is being sampled as an array texture or not.
    bool is_shadow = false;    ///< Whether the texture is being sampled as a depth texture or not.
    bool is_buffer = false;    ///< Whether the texture is a texture buffer without sampler.
    bool is_bindless = false;  ///< Whether this sampler belongs to a bindless texture or not.
    bool is_indexed = false;   ///< Whether this sampler is an indexed array of textures.
    bool is_separated = false; ///< Whether the image and sampler is separated or not.
 };
 /// Represents a tracked bindless sampler into a direct const buffer
 struct ArraySamplerNode {
    u32 index;
    u32 base_offset;
    u32 bindless_var;
 };
 /// Represents a tracked separate sampler image pair that was folded statically
 struct SeparateSamplerNode {
    std::pair<u32, u32> indices;
    std::pair<u32, u32> offsets;
 };
 /// Represents a tracked bindless sampler into a direct const buffer
 struct BindlessSamplerNode {
    u32 index;
    u32 offset;
 };
 struct ImageEntry {
 public:
    /// Bound images constructor
    explicit ImageEntry(u32 index_, u32 offset_, Tegra::Shader::ImageType type_)
        : index{index_}, offset{offset_}, type{type_} {}
    /// Bindless samplers constructor
    explicit ImageEntry(u32 index_, u32 offset_, u32 buffer_, Tegra::Shader::ImageType type_)
        : index{index_}, offset{offset_}, buffer{buffer_}, type{type_}, is_bindless{true} {}
    void MarkWrite() {
        is_written = true;
    }
    void MarkRead() {
        is_read = true;
    }
    void MarkAtomic() {
        MarkWrite();
        MarkRead();
        is_atomic = true;
    }
    u32 index = 0;
    u32 offset = 0;
    u32 buffer = 0;
    Tegra::Shader::ImageType type{};
    bool is_bindless = false;
    bool is_written = false;
    bool is_read = false;
    bool is_atomic = false;
 };
 struct GlobalMemoryBase {
    u32 cbuf_index = 0;
    u32 cbuf_offset = 0;
    [[nodiscard]] bool operator<(const GlobalMemoryBase& rhs) const {
        return std::tie(cbuf_index, cbuf_offset) < std::tie(rhs.cbuf_index, rhs.cbuf_offset);
    }
 };
 /// Parameters describing an arithmetic operation
 struct MetaArithmetic {
    bool precise{}; ///< Whether the operation can be constraint or not
 };
 /// Parameters describing a texture sampler
 struct MetaTexture {
    SamplerEntry sampler;
    Node array;
    Node depth_compare;
    std::vector<Node> aoffi;
    std::vector<Node> ptp;
    std::vector<Node> derivates;
    Node bias;
    Node lod;
    Node component;
    u32 element{};
    Node index;
 };
 struct MetaImage {
    const ImageEntry& image;
    std::vector<Node> values;
    u32 element{};
 };
 /// Parameters that modify an operation but are not part of any particular operand
 using Meta =
    std::variant<MetaArithmetic, MetaTexture, MetaImage, MetaStackClass, Tegra::Shader::HalfType>;
 class AmendNode {
 public:
    [[nodiscard]] std::optional<std::size_t> GetAmendIndex() const {
        if (amend_index == amend_null_index) {
            return std::nullopt;
        }
        return {amend_index};
    }
    void SetAmendIndex(std::size_t index) {
        amend_index = index;
    }
    void ClearAmend() {
        amend_index = amend_null_index;
    }
 private:
    static constexpr std::size_t amend_null_index = 0xFFFFFFFFFFFFFFFFULL;
    std::size_t amend_index{amend_null_index};
 };
 /// Holds any kind of operation that can be done in the IR
 class OperationNode final : public AmendNode {
 public:
    explicit OperationNode(OperationCode code_) : OperationNode(code_, Meta{}) {}
    explicit OperationNode(OperationCode code_, Meta meta_)
        : OperationNode(code_, std::move(meta_), std::vector<Node>{}) {}
    explicit OperationNode(OperationCode code_, std::vector<Node> operands_)
        : OperationNode(code_, Meta{}, std::move(operands_)) {}
    explicit OperationNode(OperationCode code_, Meta meta_, std::vector<Node> operands_)
        : code{code_}, meta{std::move(meta_)}, operands{std::move(operands_)} {}
    template <typename... Args>
    explicit OperationNode(OperationCode code_, Meta meta_, Args&&... operands_)
        : code{code_}, meta{std::move(meta_)}, operands{operands_...} {}
    [[nodiscard]] OperationCode GetCode() const {
        return code;
    }
    [[nodiscard]] const Meta& GetMeta() const {
        return meta;
    }
    [[nodiscard]] std::size_t GetOperandsCount() const {
        return operands.size();
    }
    [[nodiscard]] const Node& operator[](std::size_t operand_index) const {
        return operands.at(operand_index);
    }
 private:
    OperationCode code{};
    Meta meta{};
    std::vector<Node> operands;
 };
 /// Encloses inside any kind of node that returns a boolean conditionally-executed code
 class ConditionalNode final : public AmendNode {
 public:
    explicit ConditionalNode(Node condition_, std::vector<Node>&& code_)
        : condition{std::move(condition_)}, code{std::move(code_)} {}
    [[nodiscard]] const Node& GetCondition() const {
        return condition;
    }
    [[nodiscard]] const std::vector<Node>& GetCode() const {
        return code;
    }
 private:
    Node condition;         ///< Condition to be satisfied
    std::vector<Node> code; ///< Code to execute
 };
 /// A general purpose register
 class GprNode final {
 public:
    explicit constexpr GprNode(Tegra::Shader::Register index_) : index{index_} {}
    [[nodiscard]] constexpr u32 GetIndex() const {
        return static_cast<u32>(index);
    }
 private:
    Tegra::Shader::Register index{};
 };
 /// A custom variable
 class CustomVarNode final {
 public:
    explicit constexpr CustomVarNode(u32 index_) : index{index_} {}
    [[nodiscard]] constexpr u32 GetIndex() const {
        return index;
    }
 private:
    u32 index{};
 };
 /// A 32-bits value that represents an immediate value
 class ImmediateNode final {
 public:
    explicit constexpr ImmediateNode(u32 value_) : value{value_} {}
    [[nodiscard]] constexpr u32 GetValue() const {
        return value;
    }
 private:
    u32 value{};
 };
 /// One of Maxwell's internal flags
 class InternalFlagNode final {
 public:
    explicit constexpr InternalFlagNode(InternalFlag flag_) : flag{flag_} {}
    [[nodiscard]] constexpr InternalFlag GetFlag() const {
        return flag;
    }
 private:
    InternalFlag flag{};
 };
 /// A predicate register, it can be negated without additional nodes
 class PredicateNode final {
 public:
    explicit constexpr PredicateNode(Tegra::Shader::Pred index_, bool negated_)
        : index{index_}, negated{negated_} {}
    [[nodiscard]] constexpr Tegra::Shader::Pred GetIndex() const {
        return index;
    }
    [[nodiscard]] constexpr bool IsNegated() const {
        return negated;
    }
 private:
    Tegra::Shader::Pred index{};
    bool negated{};
 };
 /// Attribute buffer memory (known as attributes or varyings in GLSL terms)
 class AbufNode final {
 public:
    // Initialize for standard attributes (index is explicit).
    explicit AbufNode(Tegra::Shader::Attribute::Index index_, u32 element_, Node buffer_ = {})
        : buffer{std::move(buffer_)}, index{index_}, element{element_} {}
    // Initialize for physical attributes (index is a variable value).
    explicit AbufNode(Node physical_address_, Node buffer_ = {})
        : physical_address{std::move(physical_address_)}, buffer{std::move(buffer_)} {}
    [[nodiscard]] Tegra::Shader::Attribute::Index GetIndex() const {
        return index;
    }
    [[nodiscard]] u32 GetElement() const {
        return element;
    }
    [[nodiscard]] const Node& GetBuffer() const {
        return buffer;
    }
    [[nodiscard]] bool IsPhysicalBuffer() const {
        return static_cast<bool>(physical_address);
    }
    [[nodiscard]] const Node& GetPhysicalAddress() const {
        return physical_address;
    }
 private:
    Node physical_address;
    Node buffer;
    Tegra::Shader::Attribute::Index index{};
    u32 element{};
 };
 /// Patch memory (used to communicate tessellation stages).
 class PatchNode final {
 public:
    explicit constexpr PatchNode(u32 offset_) : offset{offset_} {}
    [[nodiscard]] constexpr u32 GetOffset() const {
        return offset;
    }
 private:
    u32 offset{};
 };
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
 class CbufNode final {
 public:
    explicit CbufNode(u32 index_, Node offset_) : index{index_}, offset{std::move(offset_)} {}
    [[nodiscard]] u32 GetIndex() const {
        return index;
    }
    [[nodiscard]] const Node& GetOffset() const {
        return offset;
    }
 private:
    u32 index{};
    Node offset;
 };
 /// Local memory node
 class LmemNode final {
 public:
    explicit LmemNode(Node address_) : address{std::move(address_)} {}
    [[nodiscard]] const Node& GetAddress() const {
        return address;
    }
 private:
    Node address;
 };
 /// Shared memory node
 class SmemNode final {
 public:
    explicit SmemNode(Node address_) : address{std::move(address_)} {}
    [[nodiscard]] const Node& GetAddress() const {
        return address;
    }
 private:
    Node address;
 };
 /// Global memory node
 class GmemNode final {
 public:
    explicit GmemNode(Node real_address_, Node base_address_, const GlobalMemoryBase& descriptor_)
        : real_address{std::move(real_address_)}, base_address{std::move(base_address_)},
          descriptor{descriptor_} {}
    [[nodiscard]] const Node& GetRealAddress() const {
        return real_address;
    }
    [[nodiscard]] const Node& GetBaseAddress() const {
        return base_address;
    }
    [[nodiscard]] const GlobalMemoryBase& GetDescriptor() const {
        return descriptor;
    }
 private:
    Node real_address;
    Node base_address;
    GlobalMemoryBase descriptor;
 };
 /// Commentary, can be dropped
 class CommentNode final {
 public:
    explicit CommentNode(std::string text_) : text{std::move(text_)} {}
    [[nodiscard]] const std::string& GetText() const {
        return text;
    }
 private:
    std::string text;
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/node_helper.cpp
+++ b/src/video_core/shader/node_helper.cpp
@ -1,115 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <cstring>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 Node Conditional(Node condition, std::vector<Node> code) {
    return MakeNode<ConditionalNode>(std::move(condition), std::move(code));
 }
 Node Comment(std::string text) {
    return MakeNode<CommentNode>(std::move(text));
 }
 Node Immediate(u32 value) {
    return MakeNode<ImmediateNode>(value);
 }
 Node Immediate(s32 value) {
    return Immediate(static_cast<u32>(value));
 }
 Node Immediate(f32 value) {
    u32 integral;
    std::memcpy(&integral, &value, sizeof(u32));
    return Immediate(integral);
 }
 OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed) {
    if (is_signed) {
        return operation_code;
    }
    switch (operation_code) {
    case OperationCode::FCastInteger:
        return OperationCode::FCastUInteger;
    case OperationCode::IAdd:
        return OperationCode::UAdd;
    case OperationCode::IMul:
        return OperationCode::UMul;
    case OperationCode::IDiv:
        return OperationCode::UDiv;
    case OperationCode::IMin:
        return OperationCode::UMin;
    case OperationCode::IMax:
        return OperationCode::UMax;
    case OperationCode::ICastFloat:
        return OperationCode::UCastFloat;
    case OperationCode::ICastUnsigned:
        return OperationCode::UCastSigned;
    case OperationCode::ILogicalShiftLeft:
        return OperationCode::ULogicalShiftLeft;
    case OperationCode::ILogicalShiftRight:
        return OperationCode::ULogicalShiftRight;
    case OperationCode::IArithmeticShiftRight:
        return OperationCode::UArithmeticShiftRight;
    case OperationCode::IBitwiseAnd:
        return OperationCode::UBitwiseAnd;
    case OperationCode::IBitwiseOr:
        return OperationCode::UBitwiseOr;
    case OperationCode::IBitwiseXor:
        return OperationCode::UBitwiseXor;
    case OperationCode::IBitwiseNot:
        return OperationCode::UBitwiseNot;
    case OperationCode::IBitfieldExtract:
        return OperationCode::UBitfieldExtract;
    case OperationCode::IBitfieldInsert:
        return OperationCode::UBitfieldInsert;
    case OperationCode::IBitCount:
        return OperationCode::UBitCount;
    case OperationCode::LogicalILessThan:
        return OperationCode::LogicalULessThan;
    case OperationCode::LogicalIEqual:
        return OperationCode::LogicalUEqual;
    case OperationCode::LogicalILessEqual:
        return OperationCode::LogicalULessEqual;
    case OperationCode::LogicalIGreaterThan:
        return OperationCode::LogicalUGreaterThan;
    case OperationCode::LogicalINotEqual:
        return OperationCode::LogicalUNotEqual;
    case OperationCode::LogicalIGreaterEqual:
        return OperationCode::LogicalUGreaterEqual;
    case OperationCode::AtomicIExchange:
        return OperationCode::AtomicUExchange;
    case OperationCode::AtomicIAdd:
        return OperationCode::AtomicUAdd;
    case OperationCode::AtomicIMin:
        return OperationCode::AtomicUMin;
    case OperationCode::AtomicIMax:
        return OperationCode::AtomicUMax;
    case OperationCode::AtomicIAnd:
        return OperationCode::AtomicUAnd;
    case OperationCode::AtomicIOr:
        return OperationCode::AtomicUOr;
    case OperationCode::AtomicIXor:
        return OperationCode::AtomicUXor;
    case OperationCode::INegate:
        UNREACHABLE_MSG("Can't negate an unsigned integer");
        return {};
    case OperationCode::IAbsolute:
        UNREACHABLE_MSG("Can't apply absolute to an unsigned integer");
        return {};
    default:
        UNREACHABLE_MSG("Unknown signed operation with code={}", operation_code);
        return {};
    }
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@ -1,71 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <memory>
 #include <string>
 #include <tuple>
 #include <type_traits>
 #include <utility>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/shader/node.h"
 namespace VideoCommon::Shader {
 /// This arithmetic operation cannot be constraint
 inline constexpr MetaArithmetic PRECISE = {true};
 /// This arithmetic operation can be optimized away
 inline constexpr MetaArithmetic NO_PRECISE = {false};
 /// Creates a conditional node
 Node Conditional(Node condition, std::vector<Node> code);
 /// Creates a commentary node
 Node Comment(std::string text);
 /// Creates an u32 immediate
 Node Immediate(u32 value);
 /// Creates a s32 immediate
 Node Immediate(s32 value);
 /// Creates a f32 immediate
 Node Immediate(f32 value);
 /// Converts an signed operation code to an unsigned operation code
 OperationCode SignedToUnsignedCode(OperationCode operation_code, bool is_signed);
 template <typename T, typename... Args>
 Node MakeNode(Args&&... args) {
    static_assert(std::is_convertible_v<T, NodeData>);
    return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
 }
 template <typename T, typename... Args>
 TrackSampler MakeTrackSampler(Args&&... args) {
    static_assert(std::is_convertible_v<T, TrackSamplerData>);
    return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
 }
 template <typename... Args>
 Node Operation(OperationCode code, Args&&... args) {
    if constexpr (sizeof...(args) == 0) {
        return MakeNode<OperationNode>(code);
    } else if constexpr (std::is_convertible_v<std::tuple_element_t<0, std::tuple<Args...>>,
                                               Meta>) {
        return MakeNode<OperationNode>(code, std::forward<Args>(args)...);
    } else {
        return MakeNode<OperationNode>(code, Meta{}, std::forward<Args>(args)...);
    }
 }
 template <typename... Args>
 Node SignedOperation(OperationCode code, bool is_signed, Args&&... args) {
    return Operation(SignedToUnsignedCode(code, is_signed), std::forward<Args>(args)...);
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/registry.cpp
+++ b/src/video_core/shader/registry.cpp
@ -1,181 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <tuple>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/shader/registry.h"
 namespace VideoCommon::Shader {
 using Tegra::Engines::ConstBufferEngineInterface;
 using Tegra::Engines::SamplerDescriptor;
 using Tegra::Engines::ShaderType;
 namespace {
 GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
    if (shader_stage == ShaderType::Compute) {
        return {};
    }
    auto& graphics = dynamic_cast<Tegra::Engines::Maxwell3D&>(engine);
    return {
        .tfb_layouts = graphics.regs.tfb_layouts,
        .tfb_varying_locs = graphics.regs.tfb_varying_locs,
        .primitive_topology = graphics.regs.draw.topology,
        .tessellation_primitive = graphics.regs.tess_mode.prim,
        .tessellation_spacing = graphics.regs.tess_mode.spacing,
        .tfb_enabled = graphics.regs.tfb_enabled != 0,
        .tessellation_clockwise = graphics.regs.tess_mode.cw.Value() != 0,
    };
 }
 ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) {
    if (shader_stage != ShaderType::Compute) {
        return {};
    }
    auto& compute = dynamic_cast<Tegra::Engines::KeplerCompute&>(engine);
    const auto& launch = compute.launch_description;
    return {
        .workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z},
        .shared_memory_size_in_words = launch.shared_alloc,
        .local_memory_size_in_words = launch.local_pos_alloc,
    };
 }
 } // Anonymous namespace
 Registry::Registry(ShaderType shader_stage, const SerializedRegistryInfo& info)
    : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile},
      bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {}
 Registry::Registry(ShaderType shader_stage, ConstBufferEngineInterface& engine_)
    : stage{shader_stage}, engine{&engine_}, bound_buffer{engine_.GetBoundBuffer()},
      graphics_info{MakeGraphicsInfo(shader_stage, engine_)}, compute_info{MakeComputeInfo(
                                                                  shader_stage, engine_)} {}
 Registry::~Registry() = default;
 std::optional<u32> Registry::ObtainKey(u32 buffer, u32 offset) {
    const std::pair<u32, u32> key = {buffer, offset};
    const auto iter = keys.find(key);
    if (iter != keys.end()) {
        return iter->second;
    }
    if (!engine) {
        return std::nullopt;
    }
    const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
    keys.emplace(key, value);
    return value;
 }
 std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
    const u32 key = offset;
    const auto iter = bound_samplers.find(key);
    if (iter != bound_samplers.end()) {
        return iter->second;
    }
    if (!engine) {
        return std::nullopt;
    }
    const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
    bound_samplers.emplace(key, value);
    return value;
 }
 std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
    std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
    SeparateSamplerKey key;
    key.buffers = buffers;
    key.offsets = offsets;
    const auto iter = separate_samplers.find(key);
    if (iter != separate_samplers.end()) {
        return iter->second;
    }
    if (!engine) {
        return std::nullopt;
    }
    const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
    const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
    const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
    separate_samplers.emplace(key, value);
    return value;
 }
 std::optional<SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, u32 offset) {
    const std::pair key = {buffer, offset};
    const auto iter = bindless_samplers.find(key);
    if (iter != bindless_samplers.end()) {
        return iter->second;
    }
    if (!engine) {
        return std::nullopt;
    }
    const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
    bindless_samplers.emplace(key, value);
    return value;
 }
 void Registry::InsertKey(u32 buffer, u32 offset, u32 value) {
    keys.insert_or_assign({buffer, offset}, value);
 }
 void Registry::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
    bound_samplers.insert_or_assign(offset, sampler);
 }
 void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
    bindless_samplers.insert_or_assign({buffer, offset}, sampler);
 }
 bool Registry::IsConsistent() const {
    if (!engine) {
        return true;
    }
    return std::all_of(keys.begin(), keys.end(),
                       [this](const auto& pair) {
                           const auto [cbuf, offset] = pair.first;
                           const auto value = pair.second;
                           return value == engine->AccessConstBuffer32(stage, cbuf, offset);
                       }) &&
           std::all_of(bound_samplers.begin(), bound_samplers.end(),
                       [this](const auto& sampler) {
                           const auto [key, value] = sampler;
                           return value == engine->AccessBoundSampler(stage, key);
                       }) &&
           std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
                       [this](const auto& sampler) {
                           const auto [cbuf, offset] = sampler.first;
                           const auto value = sampler.second;
                           return value == engine->AccessBindlessSampler(stage, cbuf, offset);
                       });
 }
 bool Registry::HasEqualKeys(const Registry& rhs) const {
    return std::tie(keys, bound_samplers, bindless_samplers) ==
           std::tie(rhs.keys, rhs.bound_samplers, rhs.bindless_samplers);
 }
 const GraphicsInfo& Registry::GetGraphicsInfo() const {
    ASSERT(stage != Tegra::Engines::ShaderType::Compute);
    return graphics_info;
 }
 const ComputeInfo& Registry::GetComputeInfo() const {
    ASSERT(stage == Tegra::Engines::ShaderType::Compute);
    return compute_info;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/registry.h
+++ b/src/video_core/shader/registry.h
@ -1,172 +0,0 @@
 // Copyright 2019 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <array>
 #include <optional>
 #include <type_traits>
 #include <unordered_map>
 #include <utility>
 #include "common/common_types.h"
 #include "common/hash.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 #include "video_core/guest_driver.h"
 namespace VideoCommon::Shader {
 struct SeparateSamplerKey {
    std::pair<u32, u32> buffers;
    std::pair<u32, u32> offsets;
 };
 } // namespace VideoCommon::Shader
 namespace std {
 template <>
 struct hash<VideoCommon::Shader::SeparateSamplerKey> {
    std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
        return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
                                key.offsets.second);
    }
 };
 template <>
 struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
    bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
                    const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
        return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
    }
 };
 } // namespace std
 namespace VideoCommon::Shader {
 using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
 using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
 using SeparateSamplerMap =
    std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
 using BindlessSamplerMap =
    std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
 struct GraphicsInfo {
    using Maxwell = Tegra::Engines::Maxwell3D::Regs;
    std::array<Maxwell::TransformFeedbackLayout, Maxwell::NumTransformFeedbackBuffers>
        tfb_layouts{};
    std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> tfb_varying_locs{};
    Maxwell::PrimitiveTopology primitive_topology{};
    Maxwell::TessellationPrimitive tessellation_primitive{};
    Maxwell::TessellationSpacing tessellation_spacing{};
    bool tfb_enabled = false;
    bool tessellation_clockwise = false;
 };
 static_assert(std::is_trivially_copyable_v<GraphicsInfo> &&
              std::is_standard_layout_v<GraphicsInfo>);
 struct ComputeInfo {
    std::array<u32, 3> workgroup_size{};
    u32 shared_memory_size_in_words = 0;
    u32 local_memory_size_in_words = 0;
 };
 static_assert(std::is_trivially_copyable_v<ComputeInfo> && std::is_standard_layout_v<ComputeInfo>);
 struct SerializedRegistryInfo {
    VideoCore::GuestDriverProfile guest_driver_profile;
    u32 bound_buffer = 0;
    GraphicsInfo graphics;
    ComputeInfo compute;
 };
 /**
 * The Registry is a class use to interface the 3D and compute engines with the shader compiler.
 * With it, the shader can obtain required data from GPU state and store it for disk shader
 * compilation.
 */
 class Registry {
 public:
    explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info);
    explicit Registry(Tegra::Engines::ShaderType shader_stage,
                      Tegra::Engines::ConstBufferEngineInterface& engine_);
    ~Registry();
    /// Retrieves a key from the registry, if it's registered, it will give the registered value, if
    /// not it will obtain it from maxwell3d and register it.
    std::optional<u32> ObtainKey(u32 buffer, u32 offset);
    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
    std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
        std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
    /// Inserts a key.
    void InsertKey(u32 buffer, u32 offset, u32 value);
    /// Inserts a bound sampler key.
    void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
    /// Inserts a bindless sampler key.
    void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
    /// Checks keys and samplers against engine's current const buffers.
    /// Returns true if they are the same value, false otherwise.
    bool IsConsistent() const;
    /// Returns true if the keys are equal to the other ones in the registry.
    bool HasEqualKeys(const Registry& rhs) const;
    /// Returns graphics information from this shader
    const GraphicsInfo& GetGraphicsInfo() const;
    /// Returns compute information from this shader
    const ComputeInfo& GetComputeInfo() const;
    /// Gives an getter to the const buffer keys in the database.
    const KeyMap& GetKeys() const {
        return keys;
    }
    /// Gets samplers database.
    const BoundSamplerMap& GetBoundSamplers() const {
        return bound_samplers;
    }
    /// Gets bindless samplers database.
    const BindlessSamplerMap& GetBindlessSamplers() const {
        return bindless_samplers;
    }
    /// Gets bound buffer used on this shader
    u32 GetBoundBuffer() const {
        return bound_buffer;
    }
    /// Obtains access to the guest driver's profile.
    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() {
        return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile;
    }
 private:
    const Tegra::Engines::ShaderType stage;
    VideoCore::GuestDriverProfile stored_guest_driver_profile;
    Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
    KeyMap keys;
    BoundSamplerMap bound_samplers;
    SeparateSamplerMap separate_samplers;
    BindlessSamplerMap bindless_samplers;
    u32 bound_buffer;
    GraphicsInfo graphics_info;
    ComputeInfo compute_info;
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@ -1,464 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <array>
 #include <cmath>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::IpaMode;
 using Tegra::Shader::Pred;
 using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;
 ShaderIR::ShaderIR(const ProgramCode& program_code_, u32 main_offset_, CompilerSettings settings_,
                   Registry& registry_)
    : program_code{program_code_}, main_offset{main_offset_}, settings{settings_}, registry{
                                                                                       registry_} {
    Decode();
    PostDecode();
 }
 ShaderIR::~ShaderIR() = default;
 Node ShaderIR::GetRegister(Register reg) {
    if (reg != Register::ZeroIndex) {
        used_registers.insert(static_cast<u32>(reg));
    }
    return MakeNode<GprNode>(reg);
 }
 Node ShaderIR::GetCustomVariable(u32 id) {
    return MakeNode<CustomVarNode>(id);
 }
 Node ShaderIR::GetImmediate19(Instruction instr) {
    return Immediate(instr.alu.GetImm20_19());
 }
 Node ShaderIR::GetImmediate32(Instruction instr) {
    return Immediate(instr.alu.GetImm20_32());
 }
 Node ShaderIR::GetConstBuffer(u64 index_, u64 offset_) {
    const auto index = static_cast<u32>(index_);
    const auto offset = static_cast<u32>(offset_);
    used_cbufs.try_emplace(index).first->second.MarkAsUsed(offset);
    return MakeNode<CbufNode>(index, Immediate(offset));
 }
 Node ShaderIR::GetConstBufferIndirect(u64 index_, u64 offset_, Node node) {
    const auto index = static_cast<u32>(index_);
    const auto offset = static_cast<u32>(offset_);
    used_cbufs.try_emplace(index).first->second.MarkAsUsedIndirect();
    Node final_offset = [&] {
        // Attempt to inline constant buffer without a variable offset. This is done to allow
        // tracking LDC calls.
        if (const auto gpr = std::get_if<GprNode>(&*node)) {
            if (gpr->GetIndex() == Register::ZeroIndex) {
                return Immediate(offset);
            }
        }
        return Operation(OperationCode::UAdd, NO_PRECISE, std::move(node), Immediate(offset));
    }();
    return MakeNode<CbufNode>(index, std::move(final_offset));
 }
 Node ShaderIR::GetPredicate(u64 pred_, bool negated) {
    const auto pred = static_cast<Pred>(pred_);
    if (pred != Pred::UnusedIndex && pred != Pred::NeverExecute) {
        used_predicates.insert(pred);
    }
    return MakeNode<PredicateNode>(pred, negated);
 }
 Node ShaderIR::GetPredicate(bool immediate) {
    return GetPredicate(static_cast<u64>(immediate ? Pred::UnusedIndex : Pred::NeverExecute));
 }
 Node ShaderIR::GetInputAttribute(Attribute::Index index, u64 element, Node buffer) {
    MarkAttributeUsage(index, element);
    used_input_attributes.emplace(index);
    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }
 Node ShaderIR::GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer) {
    uses_physical_attributes = true;
    return MakeNode<AbufNode>(GetRegister(physical_address), buffer);
 }
 Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buffer) {
    MarkAttributeUsage(index, element);
    used_output_attributes.insert(index);
    return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }
 Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
    Node node = MakeNode<InternalFlagNode>(flag);
    if (negated) {
        return Operation(OperationCode::LogicalNegate, std::move(node));
    }
    return node;
 }
 Node ShaderIR::GetLocalMemory(Node address) {
    return MakeNode<LmemNode>(std::move(address));
 }
 Node ShaderIR::GetSharedMemory(Node address) {
    return MakeNode<SmemNode>(std::move(address));
 }
 Node ShaderIR::GetTemporary(u32 id) {
    return GetRegister(Register::ZeroIndex + 1 + id);
 }
 Node ShaderIR::GetOperandAbsNegFloat(Node value, bool absolute, bool negate) {
    if (absolute) {
        value = Operation(OperationCode::FAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
        value = Operation(OperationCode::FNegate, NO_PRECISE, std::move(value));
    }
    return value;
 }
 Node ShaderIR::GetSaturatedFloat(Node value, bool saturate) {
    if (!saturate) {
        return value;
    }
    Node positive_zero = Immediate(std::copysignf(0, 1));
    Node positive_one = Immediate(1.0f);
    return Operation(OperationCode::FClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
                     std::move(positive_one));
 }
 Node ShaderIR::ConvertIntegerSize(Node value, Register::Size size, bool is_signed) {
    switch (size) {
    case Register::Size::Byte:
        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
                                std::move(value), Immediate(24));
        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
                                std::move(value), Immediate(24));
        return value;
    case Register::Size::Short:
        value = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, NO_PRECISE,
                                std::move(value), Immediate(16));
        value = SignedOperation(OperationCode::IArithmeticShiftRight, is_signed, NO_PRECISE,
                                std::move(value), Immediate(16));
        return value;
    case Register::Size::Word:
        // Default - do nothing
        return value;
    default:
        UNREACHABLE_MSG("Unimplemented conversion size: {}", size);
        return value;
    }
 }
 Node ShaderIR::GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed) {
    if (!is_signed) {
        // Absolute or negate on an unsigned is pointless
        return value;
    }
    if (absolute) {
        value = Operation(OperationCode::IAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
        value = Operation(OperationCode::INegate, NO_PRECISE, std::move(value));
    }
    return value;
 }
 Node ShaderIR::UnpackHalfImmediate(Instruction instr, bool has_negation) {
    Node value = Immediate(instr.half_imm.PackImmediates());
    if (!has_negation) {
        return value;
    }
    Node first_negate = GetPredicate(instr.half_imm.first_negate != 0);
    Node second_negate = GetPredicate(instr.half_imm.second_negate != 0);
    return Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), std::move(first_negate),
                     std::move(second_negate));
 }
 Node ShaderIR::UnpackHalfFloat(Node value, Tegra::Shader::HalfType type) {
    return Operation(OperationCode::HUnpack, type, std::move(value));
 }
 Node ShaderIR::HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge) {
    switch (merge) {
    case Tegra::Shader::HalfMerge::H0_H1:
        return src;
    case Tegra::Shader::HalfMerge::F32:
        return Operation(OperationCode::HMergeF32, std::move(src));
    case Tegra::Shader::HalfMerge::Mrg_H0:
        return Operation(OperationCode::HMergeH0, std::move(dest), std::move(src));
    case Tegra::Shader::HalfMerge::Mrg_H1:
        return Operation(OperationCode::HMergeH1, std::move(dest), std::move(src));
    }
    UNREACHABLE();
    return src;
 }
 Node ShaderIR::GetOperandAbsNegHalf(Node value, bool absolute, bool negate) {
    if (absolute) {
        value = Operation(OperationCode::HAbsolute, NO_PRECISE, std::move(value));
    }
    if (negate) {
        value = Operation(OperationCode::HNegate, NO_PRECISE, std::move(value), GetPredicate(true),
                          GetPredicate(true));
    }
    return value;
 }
 Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
    if (!saturate) {
        return value;
    }
    Node positive_zero = Immediate(std::copysignf(0, 1));
    Node positive_one = Immediate(1.0f);
    return Operation(OperationCode::HClamp, NO_PRECISE, std::move(value), std::move(positive_zero),
                     std::move(positive_one));
 }
 Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
    if (condition == PredCondition::T) {
        return GetPredicate(true);
    } else if (condition == PredCondition::F) {
        return GetPredicate(false);
    }
    static constexpr std::array comparison_table{
        OperationCode(0),
        OperationCode::LogicalFOrdLessThan,       // LT
        OperationCode::LogicalFOrdEqual,          // EQ
        OperationCode::LogicalFOrdLessEqual,      // LE
        OperationCode::LogicalFOrdGreaterThan,    // GT
        OperationCode::LogicalFOrdNotEqual,       // NE
        OperationCode::LogicalFOrdGreaterEqual,   // GE
        OperationCode::LogicalFOrdered,           // NUM
        OperationCode::LogicalFUnordered,         // NAN
        OperationCode::LogicalFUnordLessThan,     // LTU
        OperationCode::LogicalFUnordEqual,        // EQU
        OperationCode::LogicalFUnordLessEqual,    // LEU
        OperationCode::LogicalFUnordGreaterThan,  // GTU
        OperationCode::LogicalFUnordNotEqual,     // NEU
        OperationCode::LogicalFUnordGreaterEqual, // GEU
    };
    const std::size_t index = static_cast<std::size_t>(condition);
    ASSERT_MSG(index < std::size(comparison_table), "Invalid condition={}", index);
    return Operation(comparison_table[index], op_a, op_b);
 }
 Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
                                             Node op_b) {
    static constexpr std::array comparison_table{
        std::pair{PredCondition::LT, OperationCode::LogicalILessThan},
        std::pair{PredCondition::EQ, OperationCode::LogicalIEqual},
        std::pair{PredCondition::LE, OperationCode::LogicalILessEqual},
        std::pair{PredCondition::GT, OperationCode::LogicalIGreaterThan},
        std::pair{PredCondition::NE, OperationCode::LogicalINotEqual},
        std::pair{PredCondition::GE, OperationCode::LogicalIGreaterEqual},
    };
    const auto comparison =
        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
                     [condition](const auto entry) { return condition == entry.first; });
    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                         "Unknown predicate comparison operation");
    return SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
                           std::move(op_b));
 }
 Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
                                          Node op_b) {
    static constexpr std::array comparison_table{
        std::pair{PredCondition::LT, OperationCode::Logical2HLessThan},
        std::pair{PredCondition::EQ, OperationCode::Logical2HEqual},
        std::pair{PredCondition::LE, OperationCode::Logical2HLessEqual},
        std::pair{PredCondition::GT, OperationCode::Logical2HGreaterThan},
        std::pair{PredCondition::NE, OperationCode::Logical2HNotEqual},
        std::pair{PredCondition::GE, OperationCode::Logical2HGreaterEqual},
        std::pair{PredCondition::LTU, OperationCode::Logical2HLessThanWithNan},
        std::pair{PredCondition::LEU, OperationCode::Logical2HLessEqualWithNan},
        std::pair{PredCondition::GTU, OperationCode::Logical2HGreaterThanWithNan},
        std::pair{PredCondition::NEU, OperationCode::Logical2HNotEqualWithNan},
        std::pair{PredCondition::GEU, OperationCode::Logical2HGreaterEqualWithNan},
    };
    const auto comparison =
        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
                     [condition](const auto entry) { return condition == entry.first; });
    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                         "Unknown predicate comparison operation");
    return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
 }
 OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
    static constexpr std::array operation_table{
        OperationCode::LogicalAnd,
        OperationCode::LogicalOr,
        OperationCode::LogicalXor,
    };
    const auto index = static_cast<std::size_t>(operation);
    if (index >= operation_table.size()) {
        UNIMPLEMENTED_MSG("Unknown predicate operation.");
        return {};
    }
    return operation_table[index];
 }
 Node ShaderIR::GetConditionCode(ConditionCode cc) const {
    switch (cc) {
    case ConditionCode::NEU:
        return GetInternalFlag(InternalFlag::Zero, true);
    case ConditionCode::FCSM_TR:
        UNIMPLEMENTED_MSG("EXIT.FCSM_TR is not implemented");
        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
    default:
        UNIMPLEMENTED_MSG("Unimplemented condition code: {}", cc);
        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
    }
 }
 void ShaderIR::SetRegister(NodeBlock& bb, Register dest, Node src) {
    bb.push_back(Operation(OperationCode::Assign, GetRegister(dest), std::move(src)));
 }
 void ShaderIR::SetPredicate(NodeBlock& bb, u64 dest, Node src) {
    bb.push_back(Operation(OperationCode::LogicalAssign, GetPredicate(dest), std::move(src)));
 }
 void ShaderIR::SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value) {
    bb.push_back(Operation(OperationCode::LogicalAssign, GetInternalFlag(flag), std::move(value)));
 }
 void ShaderIR::SetLocalMemory(NodeBlock& bb, Node address, Node value) {
    bb.push_back(
        Operation(OperationCode::Assign, GetLocalMemory(std::move(address)), std::move(value)));
 }
 void ShaderIR::SetSharedMemory(NodeBlock& bb, Node address, Node value) {
    bb.push_back(
        Operation(OperationCode::Assign, GetSharedMemory(std::move(address)), std::move(value)));
 }
 void ShaderIR::SetTemporary(NodeBlock& bb, u32 id, Node value) {
    SetRegister(bb, Register::ZeroIndex + 1 + id, std::move(value));
 }
 void ShaderIR::SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc) {
    if (!sets_cc) {
        return;
    }
    Node zerop = Operation(OperationCode::LogicalFOrdEqual, std::move(value), Immediate(0.0f));
    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }
 void ShaderIR::SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc) {
    if (!sets_cc) {
        return;
    }
    Node zerop = Operation(OperationCode::LogicalIEqual, std::move(value), Immediate(0));
    SetInternalFlag(bb, InternalFlag::Zero, std::move(zerop));
    LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete");
 }
 Node ShaderIR::BitfieldExtract(Node value, u32 offset, u32 bits) {
    return Operation(OperationCode::UBitfieldExtract, NO_PRECISE, std::move(value),
                     Immediate(offset), Immediate(bits));
 }
 Node ShaderIR::BitfieldInsert(Node base, Node insert, u32 offset, u32 bits) {
    return Operation(OperationCode::UBitfieldInsert, NO_PRECISE, base, insert, Immediate(offset),
                     Immediate(bits));
 }
 void ShaderIR::MarkAttributeUsage(Attribute::Index index, u64 element) {
    switch (index) {
    case Attribute::Index::LayerViewportPointSize:
        switch (element) {
        case 0:
            UNIMPLEMENTED();
            break;
        case 1:
            uses_layer = true;
            break;
        case 2:
            uses_viewport_index = true;
            break;
        case 3:
            uses_point_size = true;
            break;
        }
        break;
    case Attribute::Index::TessCoordInstanceIDVertexID:
        switch (element) {
        case 2:
            uses_instance_id = true;
            break;
        case 3:
            uses_vertex_id = true;
            break;
        }
        break;
    case Attribute::Index::ClipDistances0123:
    case Attribute::Index::ClipDistances4567: {
        const u64 clip_index = (index == Attribute::Index::ClipDistances4567 ? 4 : 0) + element;
        used_clip_distances.at(clip_index) = true;
        break;
    }
    case Attribute::Index::FrontColor:
    case Attribute::Index::FrontSecondaryColor:
    case Attribute::Index::BackColor:
    case Attribute::Index::BackSecondaryColor:
        uses_legacy_varyings = true;
        break;
    default:
        if (index >= Attribute::Index::TexCoord_0 && index <= Attribute::Index::TexCoord_7) {
            uses_legacy_varyings = true;
        }
        break;
    }
 }
 std::size_t ShaderIR::DeclareAmend(Node new_amend) {
    const auto id = amend_code.size();
    amend_code.push_back(std::move(new_amend));
    return id;
 }
 u32 ShaderIR::NewCustomVariable() {
    return num_custom_variables++;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@ -1,479 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <array>
 #include <list>
 #include <map>
 #include <optional>
 #include <set>
 #include <tuple>
 #include <vector>
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/memory_util.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/registry.h"
 namespace VideoCommon::Shader {
 struct ShaderBlock;
 constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
 struct ConstBuffer {
    constexpr explicit ConstBuffer(u32 max_offset_, bool is_indirect_)
        : max_offset{max_offset_}, is_indirect{is_indirect_} {}
    constexpr ConstBuffer() = default;
    void MarkAsUsed(u64 offset) {
        max_offset = std::max(max_offset, static_cast<u32>(offset));
    }
    void MarkAsUsedIndirect() {
        is_indirect = true;
    }
    bool IsIndirect() const {
        return is_indirect;
    }
    u32 GetSize() const {
        return max_offset + static_cast<u32>(sizeof(float));
    }
    u32 GetMaxOffset() const {
        return max_offset;
    }
 private:
    u32 max_offset = 0;
    bool is_indirect = false;
 };
 struct GlobalMemoryUsage {
    bool is_read{};
    bool is_written{};
 };
 class ShaderIR final {
 public:
    explicit ShaderIR(const ProgramCode& program_code_, u32 main_offset_,
                      CompilerSettings settings_, Registry& registry_);
    ~ShaderIR();
    const std::map<u32, NodeBlock>& GetBasicBlocks() const {
        return basic_blocks;
    }
    const std::set<u32>& GetRegisters() const {
        return used_registers;
    }
    const std::set<Tegra::Shader::Pred>& GetPredicates() const {
        return used_predicates;
    }
    const std::set<Tegra::Shader::Attribute::Index>& GetInputAttributes() const {
        return used_input_attributes;
    }
    const std::set<Tegra::Shader::Attribute::Index>& GetOutputAttributes() const {
        return used_output_attributes;
    }
    const std::map<u32, ConstBuffer>& GetConstantBuffers() const {
        return used_cbufs;
    }
    const std::list<SamplerEntry>& GetSamplers() const {
        return used_samplers;
    }
    const std::list<ImageEntry>& GetImages() const {
        return used_images;
    }
    const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& GetClipDistances()
        const {
        return used_clip_distances;
    }
    const std::map<GlobalMemoryBase, GlobalMemoryUsage>& GetGlobalMemory() const {
        return used_global_memory;
    }
    std::size_t GetLength() const {
        return static_cast<std::size_t>(coverage_end * sizeof(u64));
    }
    bool UsesLayer() const {
        return uses_layer;
    }
    bool UsesViewportIndex() const {
        return uses_viewport_index;
    }
    bool UsesPointSize() const {
        return uses_point_size;
    }
    bool UsesInstanceId() const {
        return uses_instance_id;
    }
    bool UsesVertexId() const {
        return uses_vertex_id;
    }
    bool UsesLegacyVaryings() const {
        return uses_legacy_varyings;
    }
    bool UsesYNegate() const {
        return uses_y_negate;
    }
    bool UsesWarps() const {
        return uses_warps;
    }
    bool HasPhysicalAttributes() const {
        return uses_physical_attributes;
    }
    const Tegra::Shader::Header& GetHeader() const {
        return header;
    }
    bool IsFlowStackDisabled() const {
        return disable_flow_stack;
    }
    bool IsDecompiled() const {
        return decompiled;
    }
    const ASTManager& GetASTManager() const {
        return program_manager;
    }
    ASTNode GetASTProgram() const {
        return program_manager.GetProgram();
    }
    u32 GetASTNumVariables() const {
        return program_manager.GetVariables();
    }
    u32 ConvertAddressToNvidiaSpace(u32 address) const {
        return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
    }
    /// Returns a condition code evaluated from internal flags
    Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
    const Node& GetAmendNode(std::size_t index) const {
        return amend_code[index];
    }
    u32 GetNumCustomVariables() const {
        return num_custom_variables;
    }
 private:
    friend class ASTDecoder;
    struct SamplerInfo {
        std::optional<Tegra::Shader::TextureType> type;
        std::optional<bool> is_array;
        std::optional<bool> is_shadow;
        std::optional<bool> is_buffer;
        constexpr bool IsComplete() const noexcept {
            return type && is_array && is_shadow && is_buffer;
        }
    };
    void Decode();
    void PostDecode();
    NodeBlock DecodeRange(u32 begin, u32 end);
    void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);
    void InsertControlFlow(NodeBlock& bb, const ShaderBlock& block);
    /**
     * Decodes a single instruction from Tegra to IR.
     * @param bb Basic block where the nodes will be written to.
     * @param pc Program counter. Offset to decode.
     * @return Next address to decode.
     */
    u32 DecodeInstr(NodeBlock& bb, u32 pc);
    u32 DecodeArithmetic(NodeBlock& bb, u32 pc);
    u32 DecodeArithmeticImmediate(NodeBlock& bb, u32 pc);
    u32 DecodeBfe(NodeBlock& bb, u32 pc);
    u32 DecodeBfi(NodeBlock& bb, u32 pc);
    u32 DecodeShift(NodeBlock& bb, u32 pc);
    u32 DecodeArithmeticInteger(NodeBlock& bb, u32 pc);
    u32 DecodeArithmeticIntegerImmediate(NodeBlock& bb, u32 pc);
    u32 DecodeArithmeticHalf(NodeBlock& bb, u32 pc);
    u32 DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc);
    u32 DecodeFfma(NodeBlock& bb, u32 pc);
    u32 DecodeHfma2(NodeBlock& bb, u32 pc);
    u32 DecodeConversion(NodeBlock& bb, u32 pc);
    u32 DecodeWarp(NodeBlock& bb, u32 pc);
    u32 DecodeMemory(NodeBlock& bb, u32 pc);
    u32 DecodeTexture(NodeBlock& bb, u32 pc);
    u32 DecodeImage(NodeBlock& bb, u32 pc);
    u32 DecodeFloatSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeIntegerSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeHalfSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodePredicateSetRegister(NodeBlock& bb, u32 pc);
    u32 DecodePredicateSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeRegisterSetPredicate(NodeBlock& bb, u32 pc);
    u32 DecodeFloatSet(NodeBlock& bb, u32 pc);
    u32 DecodeIntegerSet(NodeBlock& bb, u32 pc);
    u32 DecodeHalfSet(NodeBlock& bb, u32 pc);
    u32 DecodeVideo(NodeBlock& bb, u32 pc);
    u32 DecodeXmad(NodeBlock& bb, u32 pc);
    u32 DecodeOther(NodeBlock& bb, u32 pc);
    /// Generates a node for a passed register.
    Node GetRegister(Tegra::Shader::Register reg);
    /// Generates a node for a custom variable
    Node GetCustomVariable(u32 id);
    /// Generates a node representing a 19-bit immediate value
    Node GetImmediate19(Tegra::Shader::Instruction instr);
    /// Generates a node representing a 32-bit immediate value
    Node GetImmediate32(Tegra::Shader::Instruction instr);
    /// Generates a node representing a constant buffer
    Node GetConstBuffer(u64 index, u64 offset);
    /// Generates a node representing a constant buffer with a variadic offset
    Node GetConstBufferIndirect(u64 index, u64 offset, Node node);
    /// Generates a node for a passed predicate. It can be optionally negated
    Node GetPredicate(u64 pred, bool negated = false);
    /// Generates a predicate node for an immediate true or false value
    Node GetPredicate(bool immediate);
    /// Generates a node representing an input attribute. Keeps track of used attributes.
    Node GetInputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer = {});
    /// Generates a node representing a physical input attribute.
    Node GetPhysicalInputAttribute(Tegra::Shader::Register physical_address, Node buffer = {});
    /// Generates a node representing an output attribute. Keeps track of used attributes.
    Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
    /// Generates a node representing an internal flag
    Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
    /// Generates a node representing a local memory address
    Node GetLocalMemory(Node address);
    /// Generates a node representing a shared memory address
    Node GetSharedMemory(Node address);
    /// Generates a temporary, internally it uses a post-RZ register
    Node GetTemporary(u32 id);
    /// Sets a register. src value must be a number-evaluated node.
    void SetRegister(NodeBlock& bb, Tegra::Shader::Register dest, Node src);
    /// Sets a predicate. src value must be a bool-evaluated node
    void SetPredicate(NodeBlock& bb, u64 dest, Node src);
    /// Sets an internal flag. src value must be a bool-evaluated node
    void SetInternalFlag(NodeBlock& bb, InternalFlag flag, Node value);
    /// Sets a local memory address with a value.
    void SetLocalMemory(NodeBlock& bb, Node address, Node value);
    /// Sets a shared memory address with a value.
    void SetSharedMemory(NodeBlock& bb, Node address, Node value);
    /// Sets a temporary. Internally it uses a post-RZ register
    void SetTemporary(NodeBlock& bb, u32 id, Node value);
    /// Sets internal flags from a float
    void SetInternalFlagsFromFloat(NodeBlock& bb, Node value, bool sets_cc = true);
    /// Sets internal flags from an integer
    void SetInternalFlagsFromInteger(NodeBlock& bb, Node value, bool sets_cc = true);
    /// Conditionally absolute/negated float. Absolute is applied first
    Node GetOperandAbsNegFloat(Node value, bool absolute, bool negate);
    /// Conditionally saturates a float
    Node GetSaturatedFloat(Node value, bool saturate = true);
    /// Converts an integer to different sizes.
    Node ConvertIntegerSize(Node value, Tegra::Shader::Register::Size size, bool is_signed);
    /// Conditionally absolute/negated integer. Absolute is applied first
    Node GetOperandAbsNegInteger(Node value, bool absolute, bool negate, bool is_signed);
    /// Unpacks a half immediate from an instruction
    Node UnpackHalfImmediate(Tegra::Shader::Instruction instr, bool has_negation);
    /// Unpacks a binary value into a half float pair with a type format
    Node UnpackHalfFloat(Node value, Tegra::Shader::HalfType type);
    /// Merges a half pair into another value
    Node HalfMerge(Node dest, Node src, Tegra::Shader::HalfMerge merge);
    /// Conditionally absolute/negated half float pair. Absolute is applied first
    Node GetOperandAbsNegHalf(Node value, bool absolute, bool negate);
    /// Conditionally saturates a half float pair
    Node GetSaturatedHalfFloat(Node value, bool saturate = true);
    /// Get image component value by type and size
    std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type,
                                            u32 component_size, Node original_value);
    /// Returns a predicate comparing two floats
    Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
    /// Returns a predicate comparing two integers
    Node GetPredicateComparisonInteger(Tegra::Shader::PredCondition condition, bool is_signed,
                                       Node op_a, Node op_b);
    /// Returns a predicate comparing two half floats. meta consumes how both pairs will be compared
    Node GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a, Node op_b);
    /// Returns a predicate combiner operation
    OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
    /// Queries the missing sampler info from the execution context.
    SamplerInfo GetSamplerInfo(SamplerInfo info,
                               std::optional<Tegra::Engines::SamplerDescriptor> sampler);
    /// Accesses a texture sampler.
    std::optional<SamplerEntry> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
    /// Accesses a texture sampler for a bindless texture.
    std::optional<SamplerEntry> GetBindlessSampler(Tegra::Shader::Register reg, SamplerInfo info,
                                                   Node& index_var);
    /// Accesses an image.
    ImageEntry& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
    /// Access a bindless image sampler.
    ImageEntry& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
    /// Extracts a sequence of bits from a node
    Node BitfieldExtract(Node value, u32 offset, u32 bits);
    /// Inserts a sequence of bits from a node
    Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
    /// Marks the usage of a input or output attribute.
    void MarkAttributeUsage(Tegra::Shader::Attribute::Index index, u64 element);
    /// Decodes VMNMX instruction and inserts its code into the passed basic block.
    void DecodeVMNMX(NodeBlock& bb, Tegra::Shader::Instruction instr);
    void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                  const Node4& components);
    void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                   const Node4& components, bool ignore_mask = false);
    void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                       const Node4& components, bool ignore_mask = false);
    Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                     Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
                     bool is_array, bool is_aoffi,
                     std::optional<Tegra::Shader::Register> bindless_reg);
    Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
                      bool is_array);
    Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      bool depth_compare, bool is_array, bool is_aoffi, bool is_ptp,
                      bool is_bindless);
    Node4 GetTldCode(Tegra::Shader::Instruction instr);
    Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                      bool is_array);
    std::tuple<std::size_t, std::size_t> ValidateAndGetCoordinateElement(
        Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
        bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
    std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
    std::vector<Node> GetPtpCoordinates(std::array<Node, 2> ptp_regs);
    Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
                         Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
                         Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
                         std::optional<Tegra::Shader::Register> bindless_reg);
    Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
                         u64 byte_height);
    void WriteLogicOperation(NodeBlock& bb, Tegra::Shader::Register dest,
                             Tegra::Shader::LogicOperation logic_op, Node op_a, Node op_b,
                             Tegra::Shader::PredicateResultMode predicate_mode,
                             Tegra::Shader::Pred predicate, bool sets_cc);
    void WriteLop3Instruction(NodeBlock& bb, Tegra::Shader::Register dest, Node op_a, Node op_b,
                              Node op_c, Node imm_lut, bool sets_cc);
    std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
    std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
                                                       s64 cursor);
    std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
                                                             const OperationNode& operation,
                                                             Node gpr, Node base_offset,
                                                             Node tracked, const NodeBlock& code,
                                                             s64 cursor);
    std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
    std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                       s64 cursor) const;
    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                               Tegra::Shader::Instruction instr,
                                                               bool is_read, bool is_write);
    /// Register new amending code and obtain the reference id.
    std::size_t DeclareAmend(Node new_amend);
    u32 NewCustomVariable();
    const ProgramCode& program_code;
    const u32 main_offset;
    const CompilerSettings settings;
    Registry& registry;
    bool decompiled{};
    bool disable_flow_stack{};
    u32 coverage_begin{};
    u32 coverage_end{};
    std::map<u32, NodeBlock> basic_blocks;
    NodeBlock global_code;
    ASTManager program_manager{true, true};
    std::vector<Node> amend_code;
    u32 num_custom_variables{};
    std::set<u32> used_registers;
    std::set<Tegra::Shader::Pred> used_predicates;
    std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
    std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
    std::map<u32, ConstBuffer> used_cbufs;
    std::list<SamplerEntry> used_samplers;
    std::list<ImageEntry> used_images;
    std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
    std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
    bool uses_layer{};
    bool uses_viewport_index{};
    bool uses_point_size{};
    bool uses_physical_attributes{}; // Shader uses AL2P or physical attribute read/writes
    bool uses_instance_id{};
    bool uses_vertex_id{};
    bool uses_legacy_varyings{};
    bool uses_y_negate{};
    bool uses_warps{};
    bool uses_indexed_samplers{};
    Tegra::Shader::Header header;
 };
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@ -1,236 +0,0 @@
 // Copyright 2018 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <utility>
 #include <variant>
 #include "common/common_types.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {
 namespace {
 std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
                                   OperationCode operation_code) {
    for (; cursor >= 0; --cursor) {
        Node node = code.at(cursor);
        if (const auto operation = std::get_if<OperationNode>(&*node)) {
            if (operation->GetCode() == operation_code) {
                return {std::move(node), cursor};
            }
        }
        if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
            const auto& conditional_code = conditional->GetCode();
            auto result = FindOperation(
                conditional_code, static_cast<s64>(conditional_code.size() - 1), operation_code);
            auto& found = result.first;
            if (found) {
                return {std::move(found), cursor};
            }
        }
    }
    return {};
 }
 std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
    if (operation.GetCode() != OperationCode::UAdd) {
        return std::nullopt;
    }
    Node gpr;
    Node offset;
    ASSERT(operation.GetOperandsCount() == 2);
    for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
        Node operand = operation[i];
        if (std::holds_alternative<ImmediateNode>(*operand)) {
            offset = operation[i];
        } else if (std::holds_alternative<GprNode>(*operand)) {
            gpr = operation[i];
        }
    }
    if (offset && gpr) {
        return std::make_pair(gpr, offset);
    }
    return std::nullopt;
 }
 bool AmendNodeCv(std::size_t amend_index, Node node) {
    if (const auto operation = std::get_if<OperationNode>(&*node)) {
        operation->SetAmendIndex(amend_index);
        return true;
    }
    if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
        conditional->SetAmendIndex(amend_index);
        return true;
    }
    return false;
 }
 } // Anonymous namespace
 std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
                                                             s64 cursor) {
    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
        const u32 cbuf_index = cbuf->GetIndex();
        // Constant buffer found, test if it's an immediate
        const auto& offset = cbuf->GetOffset();
        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
            auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
            return {tracked, track};
        }
        if (const auto operation = std::get_if<OperationNode>(&*offset)) {
            const u32 bound_buffer = registry.GetBoundBuffer();
            if (bound_buffer != cbuf_index) {
                return {};
            }
            if (const std::optional pair = DecoupleIndirectRead(*operation)) {
                auto [gpr, base_offset] = *pair;
                return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
                                                  code, cursor);
            }
        }
        return {};
    }
    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
            return {};
        }
        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
        // register that it uses as operand
        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
        if (!source) {
            return {};
        }
        return TrackBindlessSampler(source, code, new_cursor);
    }
    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
        const OperationNode& op = *operation;
        const OperationCode opcode = operation->GetCode();
        if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
            ASSERT(op.GetOperandsCount() == 2);
            auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
            auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
            if (node_a && node_b) {
                auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
                                                                   std::pair{offset_a, offset_b});
                return {tracked, std::move(track)};
            }
        }
        std::size_t i = op.GetOperandsCount();
        while (i--) {
            if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
                // Constant buffer found in operand.
                return found;
            }
        }
        return {};
    }
    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
        const auto& conditional_code = conditional->GetCode();
        return TrackBindlessSampler(tracked, conditional_code,
                                    static_cast<s64>(conditional_code.size()));
    }
    return {};
 }
 std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
    const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
    const NodeBlock& code, s64 cursor) {
    const auto offset_imm = std::get<ImmediateNode>(*base_offset);
    const auto& gpu_driver = registry.AccessGuestDriverProfile();
    const u32 bindless_cv = NewCustomVariable();
    const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
    Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
    Node cv_node = GetCustomVariable(bindless_cv);
    Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
    const std::size_t amend_index = DeclareAmend(std::move(amend_op));
    AmendNodeCv(amend_index, code[cursor]);
    // TODO: Implement bindless index custom variable
    auto track =
        MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
    return {tracked, track};
 }
 std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
                                               s64 cursor) const {
    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
        // Constant buffer found, test if it's an immediate
        const auto& offset = cbuf->GetOffset();
        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
            return {tracked, cbuf->GetIndex(), immediate->GetValue()};
        }
        return {};
    }
    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
            return {};
        }
        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
        // register that it uses as operand
        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
        if (!source) {
            return {};
        }
        return TrackCbuf(source, code, new_cursor);
    }
    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
            if (auto found = TrackCbuf((*operation)[i - 1], code, cursor); std::get<0>(found)) {
                // Cbuf found in operand.
                return found;
            }
        }
        return {};
    }
    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
        const auto& conditional_code = conditional->GetCode();
        return TrackCbuf(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
    }
    return {};
 }
 std::optional<u32> ShaderIR::TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const {
    // Reduce the cursor in one to avoid infinite loops when the instruction sets the same register
    // that it uses as operand
    const auto result = TrackRegister(&std::get<GprNode>(*tracked), code, cursor - 1);
    const auto& found = result.first;
    if (!found) {
        return std::nullopt;
    }
    if (const auto immediate = std::get_if<ImmediateNode>(&*found)) {
        return immediate->GetValue();
    }
    return std::nullopt;
 }
 std::pair<Node, s64> ShaderIR::TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                             s64 cursor) const {
    for (; cursor >= 0; --cursor) {
        const auto [found_node, new_cursor] = FindOperation(code, cursor, OperationCode::Assign);
        if (!found_node) {
            return {};
        }
        const auto operation = std::get_if<OperationNode>(&*found_node);
        ASSERT(operation);
        const auto& target = (*operation)[0];
        if (const auto gpr_target = std::get_if<GprNode>(&*target)) {
            if (gpr_target->GetIndex() == tracked->GetIndex()) {
                return {(*operation)[1], new_cursor};
            }
        }
    }
    return {};
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/transform_feedback.cpp
+++ b/src/video_core/shader/transform_feedback.cpp
@ -1,115 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <array>
 #include <unordered_map>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/transform_feedback.h"
 namespace VideoCommon::Shader {
 namespace {
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 // TODO(Rodrigo): Change this to constexpr std::unordered_set in C++20
 /// Attribute offsets that describe a vector
 constexpr std::array VECTORS = {
    28,  // gl_Position
    32,  // Generic 0
    36,  // Generic 1
    40,  // Generic 2
    44,  // Generic 3
    48,  // Generic 4
    52,  // Generic 5
    56,  // Generic 6
    60,  // Generic 7
    64,  // Generic 8
    68,  // Generic 9
    72,  // Generic 10
    76,  // Generic 11
    80,  // Generic 12
    84,  // Generic 13
    88,  // Generic 14
    92,  // Generic 15
    96,  // Generic 16
    100, // Generic 17
    104, // Generic 18
    108, // Generic 19
    112, // Generic 20
    116, // Generic 21
    120, // Generic 22
    124, // Generic 23
    128, // Generic 24
    132, // Generic 25
    136, // Generic 26
    140, // Generic 27
    144, // Generic 28
    148, // Generic 29
    152, // Generic 30
    156, // Generic 31
    160, // gl_FrontColor
    164, // gl_FrontSecondaryColor
    160, // gl_BackColor
    164, // gl_BackSecondaryColor
    192, // gl_TexCoord[0]
    196, // gl_TexCoord[1]
    200, // gl_TexCoord[2]
    204, // gl_TexCoord[3]
    208, // gl_TexCoord[4]
    212, // gl_TexCoord[5]
    216, // gl_TexCoord[6]
    220, // gl_TexCoord[7]
 };
 } // namespace
 std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info) {
    std::unordered_map<u8, VaryingTFB> tfb;
    for (std::size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
        const auto& locations = info.tfb_varying_locs[buffer];
        const auto& layout = info.tfb_layouts[buffer];
        const std::size_t varying_count = layout.varying_count;
        std::size_t highest = 0;
        for (std::size_t offset = 0; offset < varying_count; ++offset) {
            const std::size_t base_offset = offset;
            const u8 location = locations[offset];
            VaryingTFB varying;
            varying.buffer = layout.stream;
            varying.stride = layout.stride;
            varying.offset = offset * sizeof(u32);
            varying.components = 1;
            if (std::find(VECTORS.begin(), VECTORS.end(), location / 4 * 4) != VECTORS.end()) {
                UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
                const u8 base_index = location / 4;
                while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
                    ++offset;
                    ++varying.components;
                }
            }
            [[maybe_unused]] const bool inserted = tfb.emplace(location, varying).second;
            UNIMPLEMENTED_IF_MSG(!inserted, "Varying already stored");
            highest = std::max(highest, (base_offset + varying.components) * sizeof(u32));
        }
        UNIMPLEMENTED_IF(highest != layout.stride);
    }
    return tfb;
 }
 } // namespace VideoCommon::Shader
--- a/src/video_core/shader/transform_feedback.h
+++ b/src/video_core/shader/transform_feedback.h
@ -1,23 +0,0 @@
 // Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 #pragma once
 #include <unordered_map>
 #include "common/common_types.h"
 #include "video_core/shader/registry.h"
 namespace VideoCommon::Shader {
 struct VaryingTFB {
    std::size_t buffer;
    std::size_t stride;
    std::size_t offset;
    std::size_t components;
 };
 std::unordered_map<u8, VaryingTFB> BuildTransformFeedback(const GraphicsInfo& info);
 } // namespace VideoCommon::Shader
		`@ -1 +1 @@`
			`Subproject commit 8188e3fbbc105591064093440f88081fb957d4f0`				`Subproject commit 07c4a37bcf41ea50aef6e98236abdfe8089fb4c6`
		`@ -1 +1 @@`
			`Subproject commit 200310e8faa756b9869dd6dfc902c255246ac74a`				`Subproject commit a39596358a3a5488c06554c0c15184a6af71e433`